Cantera  2.0
csvdiff.cpp
1 /*
2  * csvdiff File1.csv File2.csv
3  *
4  * Compares the variable values in two Excel formatted
5  * comma separated files.
6  * The comparison is done using a weighted norm basis.
7  *
8  * The two files should be basically equal. However, File1.csv is
9  * taken as the reference file, that has precedence, when there is
10  * something to be decided upon.
11  *
12  * Arguments:
13  * -h = prints this usage information
14  *
15  * Shell Return Values
16  * 0 = Comparison was successful
17  * 1 = One or more nodal values failed the comparison
18  * 2 = One or more of the header values failed the comparison
19  * 3 = Apples to oranges, the files can not even be compared against
20  * one another.
21  */
22 
23 #include <stdio.h>
24 #include <cstdlib>
25 #include <string.h>
26 #include <math.h>
27 #include <float.h>
28 #include <limits.h>
29 #include "cantera/base/config.h"
30 #ifndef _MSC_VER
31 #include <unistd.h>
32 #else
33 #include <string>
34 #endif
35 using namespace std;
36 
37 #if defined(__CYGWIN__)
38 #include <getopt.h>
39 #endif
40 
41 #include "mdp_allo.h"
42 //#include "cantera/base/mdp_allo.h"
43 #include "tok_input_util.h"
44 #ifndef MAX
45 # define MAX(x,y) (( (x) > (y) ) ? (x) : (y))
46 #endif
47 #ifndef MIN
48 # define MIN(x,y) (( (x) < (y) ) ? (x) : (y))
49 #endif
50 
51 
52 int Debug_Flag = true;
53 double grtol = 1.0E-3;
54 double gatol = 1.0E-9;
55 
56 #define RT_PASSED 0
57 #define RT_FAILED_COL 1
58 #define RT_FAILED_HDR 2
59 #define RT_FAILED_OTHER 3
60 
61 /*
62  * First iteration towards getting this variable
63  */
64 int Max_Input_Str_Ln = MAX_INPUT_STR_LN;
65 /*****************************************************************************/
66 /*****************************************************************************/
67 /*****************************************************************************/
68 
69 #ifdef _MSC_VER
70 /*
71  * Windows doesn't have getopt(). This is an incomplete version that
72  * does enough to handle required functionality.
73  */
74 int optind = -1;
75 char* optarg = 0;
76 
77 int getopt(int argc, char** argv, const char*)
78 {
79  static int currArg = 1;
80  static int currOptInd = 1;
81  string tok;
82  static int charPos = 0;
83  int rc = -1;
84  if (currArg >= argc) {
85  optarg = 0;
86  return -rc;
87  }
88  tok = string(argv[currArg]);
89  currOptInd = currArg+1;
90  if (currOptInd > argc - 1) {
91  currOptInd = -1;
92  optarg = 0;
93  } else {
94  optarg = argv[currArg+1];
95  }
96  size_t len = strlen(tok.c_str());
97  if (charPos == 0) {
98  bool found = false;
99  do {
100  tok = string(argv[currArg]);
101  len = strlen(tok.c_str());
102  if (len > 1 && tok[0] == '-') {
103  found = true;
104  charPos = 1;
105  if (len > 2 && tok[1] == '-') {
106  charPos = 2;
107  }
108  } else {
109  if (optind == -1) {
110  optind = currArg;
111  }
112  }
113  if (!found) {
114  if (currArg < (argc-1)) {
115  currArg++;
116  } else {
117  optarg = 0;
118  return -1;
119  }
120  }
121  } while (!found);
122  }
123 
124  rc = tok[charPos];
125  if (charPos < static_cast<int>(len - 1)) {
126  charPos++;
127  } else {
128  charPos = 0;
129  }
130  return rc;
131 }
132 
133 #endif
134 
135 /*****************************************************************************/
136 /*****************************************************************************/
137 /*****************************************************************************/
138 
139 static int diff_double(double d1, double d2, double rtol, double atol)
140 
141 /*
142  * Compares 2 doubles. If they are not within tolerance, then this
143  * function returns true.
144  */
145 {
146  if (fabs(d1-d2) > (atol + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
147  return 1;
148  }
149  return 0;
150 }
151 
152 static int diff_double_slope(double d1, double d2, double rtol,
153  double atol, double xtol, double slope1, double slope2)
154 
155 /*
156  * Compares 2 doubles. If they are not within tolerance, then this
157  * function returns true.
158  */
159 {
160  double atol2 = xtol*(fabs(slope1) + fabs(slope2));
161  if (fabs(d1-d2) > (atol + atol2 + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
162  return 1;
163  }
164  return 0;
165 }
166 
167 /*****************************************************************************/
168 /*****************************************************************************/
169 /*****************************************************************************/
170 
171 static double calc_rdiff(double d1, double d2, double rtol, double atol)
172 
173 /*
174  * Calculates the relative difference using a fuzzy comparison
175  */
176 
177 {
178  double rhs, lhs;
179  rhs = fabs(d1-d2);
180  lhs = atol + rtol * 0.5 * (fabs(d1) + fabs(d2));
181  return (rhs/lhs);
182 }
183 
184 /*****************************************************************************/
185 /*
186  * breakStrCommas():
187  * This routine will break a character string into stringlets according
188  * to the placement of commas. The commas are replaced by null
189  * characters.
190  *
191  * Argument:
192  * str => original string. On exit, this string will have beent
193  * altered.
194  * strlets -> Vector of pointers to char *. The vector has a size
195  * larger than or equal to maxPieces.
196  * maxPieces -> largest number of pieces to divide the string into.
197  *
198  * Return:
199  * This returns the number of pieces that the string is actually
200  * broken up into.
201  */
202 
203 static int breakStrCommas(char* str, char** strlets, int maxPieces)
204 {
205  int numbreaks = 0;
206  if (strlets) {
207  strlets[0] = str;
208  if (str) {
209  char* cptr = str;
210  char* cetn = NULL;
211  do {
212  cetn = strchr(cptr, (int) ',');
213  if (cetn) {
214  numbreaks++;
215  cptr = cetn + 1;
216  strlets[numbreaks] = cptr;
217  *cetn = '\0';
218  }
219  } while (cetn && (numbreaks < (maxPieces - 1)));
220  }
221  }
222  return numbreaks + 1;
223 }
224 
225 /*****************************************************************************/
226 /*****************************************************************************/
227 /*****************************************************************************/
228 #define LT_NULLLINE 0
229 #define LT_TITLELINE 1
230 #define LT_COLTITLE 2
231 #define LT_DATALINE 3
232 /*
233  * get_sizes()
234  *
235  * This routine obtains the sizes of the various elements of the file
236  * by parsing the file.
237  * (HKM: Note, this file could use some work. However, it's always
238  * going to be heuristic)
239  *
240  * Arguments:
241  *
242  * fp = File pointer
243  * nTitleLines = Number of title lines
244  * nColTitleLines = Number of column title lines
245  * nCol = Number of columns -> basically equal to the
246  * number of variabless
247  * nDataRows = Number of rows of data in the file
248  *
249  */
250 
251 static void get_sizes(FILE* fp, int& nTitleLines, int& nColTitleLines,
252  int& nCol, int& nDataRows, int** ColIsFloat_ptr)
253 {
254  int nScanLinesMAX = 100;
255  int nScanLines = nScanLinesMAX;
256  int retn, i, j;
257  int maxCommas = 0;
258  TOKEN fieldToken;
259  char* scanLine = mdp_alloc_char_1(MAX_INPUT_STR_LN+1, '\0');
260  int* numCommas = mdp_alloc_int_1(nScanLinesMAX, -1);
261  int* ColIsFloat = *ColIsFloat_ptr;
262 
263  /*
264  * Rewind the file
265  */
266  rewind(fp);
267  /*
268  * Read the scan lines
269  */
270  for (i = 0; i < nScanLinesMAX; i++) {
271  retn = read_line(fp, scanLine, 0);
272  if (retn == -1) {
273  nScanLines = i;
274  break;
275  }
276  /*
277  * Strip a trailing comma from the scanline -
278  * -> These are not significant
279  */
280  int ccount = static_cast<int>(strlen(scanLine));
281  if (ccount > 0) {
282  if (scanLine[ccount-1] == ',') {
283  scanLine[ccount-1] = '\0';
284  }
285  }
286  /*
287  * Count the number of commas in the line
288  */
289  char* cptr = scanLine;
290  char* cetn = NULL;
291  numCommas[i] = 0;
292  do {
293  cetn = strchr(cptr, (int) ',');
294  if (cetn) {
295  numCommas[i]++;
296  cptr = cetn + 1;
297  }
298  } while (cetn);
299  if (i > 1) {
300  if (maxCommas < numCommas[i]) {
301  maxCommas = numCommas[i];
302  }
303  }
304  }
305  /*
306  * set a preliminary value of nCol
307  */
308  nCol = maxCommas + 1;
309  if (nScanLines == 0) {
310  nCol = 0;
311  }
312  char** strlets = (char**) mdp_alloc_ptr_1(maxCommas+1);
313 
314  /*
315  * Figure out if each column is a text or float
316  */
317  rewind(fp);
318  for (i = 0; i < nScanLines; i++) {
319  retn = read_line(fp, scanLine, 0);
320  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
321  if (ncolsFound == (maxCommas + 1)) {
322  for (j = 0; j < ncolsFound; j++) {
323  char* fieldStr = strlets[j];
324  fillTokStruct(&fieldToken, fieldStr);
325  if (fieldToken.ntokes != 1) {
326  break;
327  }
328  bool rerr = false;
329  (void) tok_to_double(&fieldToken, DBL_MAX,
330  -DBL_MAX, 0.0, &rerr);
331  if (!rerr) {
332  ColIsFloat[j] = true;
333  }
334  }
335 
336  }
337  }
338 
339 
340 
341  int doingLineType = LT_TITLELINE;
342  rewind(fp);
343  for (i = 0; i < nScanLines; i++) {
344  retn = read_line(fp, scanLine, 0);
345  /*
346  * Strip a trailing comma from the scanline -
347  * -> These are not significant
348  */
349  int ccount = static_cast<int>(strlen(scanLine));
350  if (ccount > 0) {
351  if (scanLine[ccount-1] == ',') {
352  scanLine[ccount-1] = '\0';
353  }
354  }
355  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
356 
357  if (doingLineType == LT_TITLELINE) {
358  if (numCommas[i] == maxCommas) {
359  doingLineType = LT_COLTITLE;
360  nTitleLines = i;
361  }
362  }
363 
364  if (doingLineType == LT_COLTITLE) {
365  bool goodDataLine = true;
366  bool rerr = false;
367  for (j = 0; j < ncolsFound; j++) {
368  char* fieldStr = strlets[j];
369  fillTokStruct(&fieldToken, fieldStr);
370  if (fieldToken.ntokes != 1) {
371  goodDataLine = false;
372  break;
373  }
374  if ((ColIsFloat[j]) == 1) {
375  (void) tok_to_double(&fieldToken, DBL_MAX,
376  -DBL_MAX, 0.0, &rerr);
377  if (rerr) {
378  goodDataLine = false;
379  break;
380  }
381  }
382  }
383  if (goodDataLine) {
384  doingLineType = LT_DATALINE;
385  }
386  nColTitleLines = i - nTitleLines;
387  }
388  if (doingLineType == LT_DATALINE) {
389  break;
390  }
391  }
392 
393 
394  /*
395  * Count the total number of lines in the file
396  */
397  if (doingLineType == LT_DATALINE) {
398  for (i = nColTitleLines + nTitleLines; ; i++) {
399  retn = read_line(fp, scanLine, 0);
400  if (retn == -1) {
401  nDataRows = i - nColTitleLines - nTitleLines + 1;
402  break;
403  }
404  /*
405  * Strip a trailing comma from the scanline -
406  * -> These are not significant
407  */
408  int ccount = static_cast<int>(strlen(scanLine));
409  if (ccount > 0) {
410  if (scanLine[ccount-1] == ',') {
411  scanLine[ccount-1] = '\0';
412  }
413  }
414  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
415  bool goodDataLine = true;
416  bool rerr = false;
417  for (j = 0; j < ncolsFound; j++) {
418  char* fieldStr = strlets[j];
419  fillTokStruct(&fieldToken, fieldStr);
420  if (fieldToken.ntokes != 1) {
421  goodDataLine = false;
422  break;
423  }
424  if (ColIsFloat[j] == 1) {
425  (void) tok_to_double(&fieldToken, DBL_MAX,
426  -DBL_MAX, 0.0, &rerr);
427  if (rerr) {
428  goodDataLine = false;
429  break;
430  }
431  }
432  }
433  if (! goodDataLine) {
434  doingLineType = LT_NULLLINE;
435  nDataRows = i - nColTitleLines - nTitleLines + 1;
436  break;
437  }
438  }
439  }
440  mdp_safe_free((void**) &strlets);
441  mdp_safe_free((void**) &scanLine);
442  mdp_safe_free((void**) &numCommas);
443  return;
444 }
445 
446 /*****************************************************************************/
447 /*****************************************************************************/
448 /*****************************************************************************/
449 
450 static void
451 read_title(FILE* fp, char** *title, int nTitleLines)
452 {
453  int retn;
454  *title = (char**) mdp_alloc_ptr_1(nTitleLines);
455  char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1, '\0');
456  for (int i = 0; i < nTitleLines ; i++) {
457  retn = read_line(fp, scanLine, 0);
458  if (retn >= 0) {
459  /*
460  * Strip a trailing comma from the scanline -
461  * -> These are not significant
462  */
463  int ccount = static_cast<int>(strlen(scanLine));
464  if (ccount > 0) {
465  if (scanLine[ccount-1] == ',') {
466  scanLine[ccount-1] = '\0';
467  }
468  }
469  (*title)[i] = mdp_copy_string(scanLine);
470  }
471  }
472  mdp_safe_free((void**) &scanLine);
473 }
474 
475 /*****************************************************************************/
476 /*****************************************************************************/
477 /*****************************************************************************/
478 
479 static void
480 read_colTitle(FILE* fp, char**** ColMLNames_ptr, int nColTitleLines, int nCol)
481 {
482  int retn, j;
483  *ColMLNames_ptr = (char***) mdp_alloc_ptr_1(nCol);
484  char** *ColMLNames = *ColMLNames_ptr;
485  char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1, '\0');
486  char** strlets = (char**) mdp_alloc_ptr_1(nCol+1);
487  if (nColTitleLines > 0) {
488  for (int i = 0; i < nColTitleLines ; i++) {
489  retn = read_line(fp, scanLine, 0);
490  if (retn >= 0) {
491  /*
492  * Strip a trailing comma from the scanline -
493  * -> These are not significant
494  */
495  int ccount = static_cast<int>(strlen(scanLine));
496  if (ccount > 0) {
497  if (scanLine[ccount-1] == ',') {
498  scanLine[ccount-1] = '\0';
499  }
500  }
501  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
502  ColMLNames[i] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
503  for (j = 0; j < ncolsFound; j++) {
504  strip(strlets[j]);
505  strcpy(ColMLNames[i][j], strlets[j]);
506  }
507  }
508  }
509  } else {
510  ColMLNames[0] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
511  for (j = 0; j < nCol; j++) {
512  char cbuff[256];
513  sprintf(cbuff, "Col_%d", j+1);
514  strcpy(ColMLNames[0][j], cbuff);
515  }
516  }
517  mdp_safe_free((void**) &scanLine);
518  mdp_safe_free((void**) &strlets);
519 }
520 
521 /*****************************************************************************/
522 /*****************************************************************************/
523 /*****************************************************************************/
524 
525 static double get_atol(const double* values, const int nvals,
526  const double atol)
527 {
528  int i;
529  double sum = 0.0, retn;
530  if (nvals <= 0) {
531  return gatol;
532  }
533  for (i = 0; i < nvals; i++) {
534  retn = values[i];
535  sum += retn * retn;
536  }
537  sum /= nvals;
538  retn = sqrt(sum);
539  return ((retn + 1.0) * atol);
540 }
541 
542 /*****************************************************************************/
543 /*****************************************************************************/
544 /*****************************************************************************/
545 
546 static void
547 read_values(FILE* fp, double** NVValues, char** *NSValues, int nCol, int nDataRows,
548  int* ColIsFloat)
549 {
550  char** strlets = (char**) mdp_alloc_ptr_1(nCol+1);
551  char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1, '\0');
552  TOKEN fieldToken;
553  double value;
554  int retn, j;
555  for (int i = 0; i < nDataRows; i++) {
556  retn = read_line(fp, scanLine, 0);
557  if (retn == -1) {
558  break;
559  }
560  /*
561  * Strip a trailing comma from the scanline -
562  * -> These are not significant
563  */
564  int ccount = static_cast<int>(strlen(scanLine));
565  if (ccount > 0) {
566  if (scanLine[ccount-1] == ',') {
567  scanLine[ccount-1] = '\0';
568  }
569  }
570  int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
571  bool goodDataLine = true;
572  bool rerr = false;
573  for (j = 0; j < ncolsFound; j++) {
574  char* fieldStr = strlets[j];
575  NSValues[j][i] = mdp_copy_string(strlets[j]);
576  fillTokStruct(&fieldToken, fieldStr);
577  if (fieldToken.ntokes != 1) {
578  goodDataLine = false;
579  break;
580  }
581  if (ColIsFloat[j]) {
582  value = tok_to_double(&fieldToken, DBL_MAX,
583  -DBL_MAX, 0.0, &rerr);
584  if (rerr) {
585  goodDataLine = false;
586  break;
587  }
588  NVValues[j][i] = value;
589  }
590  }
591  if (! goodDataLine) {
592  break;
593  }
594  }
595  mdp_safe_free((void**) &strlets);
596  mdp_safe_free((void**) &scanLine);
597 }
598 /*****************************************************************************/
599 /*****************************************************************************/
600 /*****************************************************************************/
601 
602 static void print_usage()
603 {
604  printf("\t\n");
605  printf(" csvdiff [-h] [-a atol] [-r rtol] File1.csv File2.csv\n");
606  printf("\t\n");
607  printf("\tCompares the variable values in two Excel formatted "
608  "comma separated files.\n");
609  printf("\tThe comparison is done using a weighted norm basis.\n");
610  printf("\t\n");
611  printf("\tThe two files should be basically equal. However, File1.csv is\n");
612  printf("\ttaken as the reference file that has precedence, when there is\n");
613  printf("\tsomething to be decided upon.\n");
614  printf("\t\n");
615  printf("\t Arguments:\n");
616  printf("\t -h = Usage info\n");
617  printf("\t -a atol = Set absolute tolerance parameter - default = 1.0E-9\n");
618  printf("\t -r rtol = Set relative tolerance parameter - default = 1.0E-3\n");
619  printf("\t\n");
620  printf("\t Shell Return Values:\n");
621  printf("\t 0 = Comparison was successful\n");
622  printf("\t 1 = One or more nodal values failed the comparison\n");
623  printf("\t 2 = One or more header values failed the comparison\n");
624  printf("\t 3 = Apples to oranges, the files can not even be compared against\n");
625  printf("\t one another.\n");
626  printf("\t\n");
627 }
628 /*****************************************************************************/
629 /*****************************************************************************/
630 /*****************************************************************************/
631 
632 int main(int argc, char* argv[])
633 
634 /*
635  * main driver for csvdiff.
636  */
637 {
638  int opt_let;
639  char* fileName1=NULL, *fileName2=NULL; /* Names of the csv files */
640  FILE* fp1=NULL, *fp2=NULL;
641  int nTitleLines1 = 0, nTitleLines2 = 0;
642  int nColTitleLines1 = 0, nColTitleLines2 = 0;
643  int nCol1 = 0, nCol2 = 0, nColMAX = 0, nColcomparisons = 0;
644  int nDataRows1 = 0, nDataRows2 = 0;
645  char** title1 = 0, **title2 = 0;
646  int** compColList = NULL;
647  char** *ColMLNames1 = NULL, *** ColMLNames2 = NULL;
648  char** ColNames1 = NULL, **ColNames2 = NULL;
649  double** NVValues1 = NULL, **NVValues2 = NULL;
650  char** *NSValues1 = NULL, *** NSValues2 = NULL;
651  int* ColIsFloat1 = NULL, *ColIsFloat2 = NULL;
652  double* curVarValues1 = NULL, *curVarValues2 = NULL;
653  char** curStringValues1 = NULL, **curStringValues2 = NULL;
654  int i, j, ndiff, jmax, i1, i2, k;
655  bool found;
656  double max_diff, rel_diff;
657  int testPassed = RT_PASSED;
658  double atol_j, atol_arg = 0.0, rtol_arg = 0.0;
659 
660  /********************** BEGIN EXECUTION ************************************/
661  int id = 0;
662  int id2 = 0;
663  char* ggg = 0;
664  char* rrr = 0;
665  /*
666  * Interpret command line arguments
667  */
668  /* Loop over each command line option */
669  while ((opt_let = getopt(argc, argv, "ha:r:")) != EOF) {
670 
671  /* case over the option letter */
672  switch (opt_let) {
673 
674  case 'h':
675  /* Usage info was requested */
676  print_usage();
677  exit(0);
678 
679  case 'a':
680  /* atol parameter */
681 
682  ggg = optarg;
683  //printf("a = %s\n", ggg);
684  id = sscanf(ggg,"%lg", &atol_arg);
685  if (id != 1) {
686  printf(" atol param bad: %s\n", ggg);
687  exit(-1);
688  }
689  gatol = atol_arg;
690  break;
691 
692  case 'r':
693  /* rtol parameter */
694 
695  rrr = optarg;
696  //printf("r = %s\n", ggg);
697  id2 = sscanf(rrr,"%lg", &rtol_arg);
698  if (id2 != 1) {
699  printf(" rtol param bad: %s\n", rrr);
700  exit(-1);
701  }
702  grtol = rtol_arg;
703  break;
704 
705 
706  default:
707  /* Default case. Error on unknown argument. */
708  printf("default called opt_let = %c\n", opt_let);
709  fprintf(stderr, "ERROR in command line usuage:\n");
710  print_usage();
711  return 0;
712  } /* End "switch(opt_let)" */
713 
714  } /* End "while((opt_let=getopt(argc, argv, "i")) != EOF)" */
715 
716  if (optind != argc-2) {
717  print_usage();
718  exit(-1);
719  } else {
720  fileName1 = argv[argc-2];
721  fileName2 = argv[argc-1];
722  }
723 
724  /*
725  * Print Out Header
726  */
727  printf("\n");
728  printf("----------------------------------------------------------\n");
729  printf("csvdiff: CSVFile comparison utility program\n");
730  printf(" Harry K. Moffat Div. 9114 Sandia National Labs\n");
731  printf(" \n");
732  printf(" First CSV File = %s\n", fileName1);
733  printf(" Second CSV file = %s\n", fileName2);
734  printf("\n");
735  printf(" Absolute tol = %g\n", gatol);
736  printf(" Relative tol = %g\n", grtol);
737  printf("----------------------------------------------------------\n");
738  printf("\n");
739 
740  /*
741  * Open up the two ascii Files #1 and #2
742  */
743  if (!(fp1 = fopen(fileName1, "r"))) {
744  fprintf(stderr,"Error opening up file1, %s\n", fileName1);
745  exit(-1);
746  }
747  if (!(fp2 = fopen(fileName2, "r"))) {
748  fprintf(stderr, "Error opening up file2, %s\n", fileName2);
749  exit(-1);
750  }
751 
752  ColIsFloat1 = mdp_alloc_int_1(200, 0);
753  ColIsFloat2 = mdp_alloc_int_1(200, 0);
754  /*
755  * Obtain the size of the problem information: Compare between files.
756  */
757 
758  get_sizes(fp1, nTitleLines1, nColTitleLines1, nCol1, nDataRows1, &ColIsFloat1);
759  if (nCol1 == 0) {
760  printf("Number of columns in file %s is zero\n", fileName1);
761  testPassed = RT_FAILED_OTHER;
762  exit(RT_FAILED_OTHER);
763  }
764  if (nDataRows1 == 0) {
765  printf("Number of data rows in file %s is zero\n", fileName1);
766  testPassed = RT_FAILED_OTHER;
767  exit(RT_FAILED_OTHER);
768  }
769 
770  get_sizes(fp2, nTitleLines2, nColTitleLines2, nCol2, nDataRows2, &ColIsFloat2);
771  if (nCol2 == 0) {
772  printf("Number of columns in file %s is zero\n", fileName2);
773  testPassed = RT_FAILED_OTHER;
774  exit(RT_FAILED_OTHER);
775  }
776  if (nDataRows2 == 0) {
777  printf("Number of data rows in file %s is zero\n", fileName2);
778  testPassed = RT_FAILED_OTHER;
779  exit(RT_FAILED_OTHER);
780  }
781 
782  if (nTitleLines1 != nTitleLines2) {
783  printf("Number of Title Lines differ:, %d %d\n",nTitleLines1, nTitleLines2);
784  testPassed = RT_FAILED_OTHER;
785  } else if (Debug_Flag) {
786  printf("Number of Title Lines in each file = %d\n", nTitleLines1);
787  }
788  if (nColTitleLines1 != nColTitleLines2) {
789  printf("Number of Column title lines differ:, %d %d\n", nColTitleLines1,
790  nColTitleLines2);
791  testPassed = RT_FAILED_OTHER;
792  } else if (Debug_Flag) {
793  printf("Number of column title lines in each file = %d\n", nColTitleLines1);
794  }
795 
796  /*
797  * Right now, if the number of data rows differ, we will punt.
798  * Maybe later we can do something more significant
799  */
800  int nDataRowsMIN = MIN(nDataRows1, nDataRows2);
801  int nDataRowsMAX = MAX(nDataRows1, nDataRows2);
802  if (nDataRows1 != nDataRows2) {
803  printf("Number of Data rows in file1, %d, is different than file2, %d\n",
804  nDataRows1, nDataRows2);
805  } else {
806  printf("Number of Data rows in both files = %d\n", nDataRowsMIN);
807  }
808 
809  rewind(fp1);
810  rewind(fp2);
811  read_title(fp1, &title1, nTitleLines1);
812  read_title(fp2, &title2, nTitleLines2);
813 
814  if (nTitleLines1 > 0 || nTitleLines2 > 0) {
815  int n = MIN(nTitleLines1, nTitleLines2);
816  for (i = 0; i < n; i++) {
817  if (strcmp(title1[i], title2[i]) != 0) {
818  printf("Title Line %d differ:\n\t\"%s\"\n\t\"%s\"\n", i, title1[i], title2[i]);
819  testPassed = RT_FAILED_HDR;
820  } else if (Debug_Flag) {
821  printf("Title Line %d for each file: \"%s\"\n", i, title1[i]);
822  }
823  }
824  if (nTitleLines1 != nTitleLines2) {
825  printf("Number of Title Lines differ: %d %d\n", nTitleLines1, nTitleLines2);
826  testPassed = RT_FAILED_HDR;
827  }
828  } else {
829  if (nTitleLines1 != nTitleLines2) {
830  if (nTitleLines1) {
831  printf("Titles differ: title for first file: \"%s\"\n",
832  title1[0]);
833  testPassed = RT_FAILED_HDR;
834  }
835  if (nTitleLines2) {
836  printf("Titles differ: title for second file: \"%s\"\n",
837  title2[0]);
838  }
839  testPassed = RT_FAILED_HDR;
840  }
841  }
842 
843  /*
844  * Get the number of column variables in each file
845  */
846 
847  if (nCol1 != nCol2) {
848  printf("Number of column variables differ:, %d %d\n",
849  nCol1, nCol2);
850  testPassed = RT_FAILED_OTHER;
851  } else if (Debug_Flag) {
852  printf("Number of column variables in both files = %d\n",
853  nCol1);
854  }
855 
856  /*
857  * Read the names of the column variables
858  */
859  read_colTitle(fp1, &ColMLNames1, nColTitleLines1, nCol1);
860  read_colTitle(fp2, &ColMLNames2, nColTitleLines2, nCol2);
861  ColNames1 = ColMLNames1[0];
862  ColNames2 = ColMLNames2[0];
863 
864  /*
865  * Do a Comparison of the names to find the maximum number
866  * of matches.
867  */
868  nColMAX = MAX(nCol1, nCol2);
869 
870  compColList = mdp_alloc_int_2(nColMAX, 2, -1);
871  nColcomparisons = 0;
872  for (i = 0; i < nCol1; i++) {
873  found = false;
874  for (j = 0; j < nCol2; j++) {
875  if (!strcmp(ColNames1[i], ColNames2[j])) {
876  compColList[nColcomparisons][0] = i;
877  compColList[nColcomparisons][1] = j;
878  nColcomparisons++;
879  found = true;
880  break;
881  }
882  }
883  if (!found) {
884  printf("csvdiff WARNING Variable %s (%d) in first file not found"
885  " in second file\n", ColNames1[i], i);
886  testPassed = RT_FAILED_OTHER;
887  }
888  }
889  for (j = 0; j < nCol2; j++) {
890  found = false;
891  for (i = 0; i < nColcomparisons; i++) {
892  if (compColList[i][1] == j) {
893  found = true;
894  }
895  }
896  if (! found) {
897  printf("csvdiff WARNING Variable %s (%d) in second file "
898  "not found in first file\n",
899  ColNames2[j], j);
900  testPassed = RT_FAILED_OTHER;
901  }
902  }
903 
904  /*
905  * Allocate storage for the column variables
906  */
907  NVValues1 = mdp_alloc_dbl_2(nCol1, nDataRowsMAX, 0.0);
908  NVValues2 = mdp_alloc_dbl_2(nCol2, nDataRowsMAX, 0.0);
909 
910  /*
911  * Allocate storage for the column variables
912  */
913  NSValues1 = (char***) mdp_alloc_ptr_2(nCol1, nDataRowsMAX);
914  NSValues2 = (char***) mdp_alloc_ptr_2(nCol2, nDataRowsMAX);
915 
916  /*
917  * Read in the values to the arrays
918  */
919  read_values(fp1, NVValues1, NSValues1, nCol1, nDataRows1, ColIsFloat1);
920  read_values(fp2, NVValues2, NSValues2, nCol2, nDataRows2, ColIsFloat2);
921 
922  /*
923  * Compare the solutions in each file
924  */
925  int method = 1;
926  double slope1, slope2, xatol;
927  int notOK;
928  for (k = 0; k < nColcomparisons; k++) {
929 
930  i1 = compColList[k][0];
931  i2 = compColList[k][1];
932  bool doFltComparison = true;
933  if (!ColIsFloat1[i1]) {
934  doFltComparison = false;
935  jmax = -1;
936  }
937  if (!ColIsFloat2[i2]) {
938  doFltComparison = false;
939  jmax = -1;
940  }
941  curStringValues1 = NSValues1[i1];
942  curStringValues2 = NSValues2[i2];
943  max_diff = 0.0;
944  ndiff = 0;
945  if (doFltComparison) {
946  curVarValues1 = NVValues1[i1];
947  curVarValues2 = NVValues2[i2];
948  atol_j = get_atol(curVarValues1, nDataRows1, gatol);
949  atol_j = MIN(atol_j, get_atol(curVarValues2, nDataRows2, gatol));
950  for (j = 0; j < nDataRowsMIN; j++) {
951 
952  slope1 = 0.0;
953  slope2 = 0.0;
954  xatol = fabs(grtol * (NVValues1[0][j] - NVValues1[0][j-1]));
955  if (j > 0 && k > 0) {
956  slope1 = (curVarValues1[j] - curVarValues1[j-1])/
957  (NVValues1[0][j] - NVValues1[0][j-1]);
958  slope2 = (curVarValues2[j] - curVarValues2[j-1])/
959  (NVValues2[0][j] - NVValues2[0][j-1]);
960  }
961  if (method) {
962  notOK = diff_double_slope(curVarValues1[j], curVarValues2[j],
963  grtol, atol_j, xatol, slope1, slope2);
964  } else {
965  notOK = diff_double(curVarValues1[j], curVarValues2[j],
966  grtol, atol_j);
967  }
968  if (notOK) {
969  ndiff++;
970  rel_diff = calc_rdiff((double) curVarValues1[j],
971  (double) curVarValues2[j], grtol, atol_j);
972  if (rel_diff > max_diff) {
973  jmax = j;
974  max_diff = rel_diff;
975  }
976  if (ndiff < 10) {
977  printf("\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
978  printf(" differ: %g %g\n", curVarValues1[j],
979  curVarValues2[j]);
980  }
981  }
982  }
983  } else {
984  for (j = 0; j < nDataRowsMIN; j++) {
985  strip(curStringValues1[j]);
986  strip(curStringValues2[j]);
987  notOK = false;
988  if (strcmp(curStringValues1[j], curStringValues2[j])) {
989  notOK = true;
990  ndiff++;
991  if (ndiff < 10) {
992  printf("\tColumn String variable %s at data row %d ", ColNames1[i1], j + 1);
993  printf(" differ: %s %s\n", curStringValues1[j],
994  curStringValues2[j]);
995  }
996  }
997  }
998  }
999 
1000  if (nDataRowsMIN != nDataRowsMAX) {
1001  ndiff += nDataRowsMAX - nDataRowsMIN;
1002  if (ndiff < 10) {
1003  if (nDataRows1 > nDataRows2) {
1004  for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
1005  printf("\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
1006  printf(" differ: %g NA\n", curVarValues1[j]);
1007  }
1008  } else {
1009  for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
1010  printf("\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
1011  printf(" differ: NA %g \n", curVarValues2[j]);
1012  }
1013  }
1014  }
1015  }
1016 
1017  /*
1018  * Print out final results of nodal variable test
1019  */
1020 
1021  if (ndiff > 0) {
1022  printf(
1023  "Column variable %s failed comparison test for %d occurrences\n",
1024  ColNames1[i1], ndiff);
1025  if (jmax >= 0) {
1026  printf(" Largest difference was at data row %d ", jmax + 1);
1027  printf(": %g %g\n", curVarValues1[jmax], curVarValues2[jmax]);
1028  }
1029  testPassed = RT_FAILED_COL;
1030  } else if (Debug_Flag) {
1031  printf("Column variable %s passed\n", ColNames1[i1]);
1032  }
1033 
1034  }
1035 
1036  return(testPassed);
1037 
1038 } /************END of main() *************************************************/
1039 /*****************************************************************************/