29 #include "cantera/base/config.h"
37 #if defined(__CYGWIN__)
43 #include "tok_input_util.h"
45 # define MAX(x,y) (( (x) > (y) ) ? (x) : (y))
48 # define MIN(x,y) (( (x) < (y) ) ? (x) : (y))
52 int Debug_Flag =
true;
53 double grtol = 1.0E-3;
54 double gatol = 1.0E-9;
57 #define RT_FAILED_COL 1
58 #define RT_FAILED_HDR 2
59 #define RT_FAILED_OTHER 3
64 int Max_Input_Str_Ln = MAX_INPUT_STR_LN;
77 int getopt(
int argc,
char** argv,
const char*)
79 static int currArg = 1;
80 static int currOptInd = 1;
82 static int charPos = 0;
84 if (currArg >= argc) {
88 tok = string(argv[currArg]);
89 currOptInd = currArg+1;
90 if (currOptInd > argc - 1) {
94 optarg = argv[currArg+1];
96 size_t len = strlen(tok.c_str());
100 tok = string(argv[currArg]);
101 len = strlen(tok.c_str());
102 if (len > 1 && tok[0] ==
'-') {
105 if (len > 2 && tok[1] ==
'-') {
114 if (currArg < (argc-1)) {
125 if (charPos < static_cast<int>(len - 1)) {
139 static int diff_double(
double d1,
double d2,
double rtol,
double atol)
146 if (fabs(d1-d2) > (atol + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
152 static int diff_double_slope(
double d1,
double d2,
double rtol,
153 double atol,
double xtol,
double slope1,
double slope2)
160 double atol2 = xtol*(fabs(slope1) + fabs(slope2));
161 if (fabs(d1-d2) > (atol + atol2 + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
171 static double calc_rdiff(
double d1,
double d2,
double rtol,
double atol)
180 lhs = atol + rtol * 0.5 * (fabs(d1) + fabs(d2));
203 static int breakStrCommas(
char* str,
char** strlets,
int maxPieces)
212 cetn = strchr(cptr, (
int)
',');
216 strlets[numbreaks] = cptr;
219 }
while (cetn && (numbreaks < (maxPieces - 1)));
222 return numbreaks + 1;
228 #define LT_NULLLINE 0
229 #define LT_TITLELINE 1
230 #define LT_COLTITLE 2
231 #define LT_DATALINE 3
251 static void get_sizes(FILE* fp,
int& nTitleLines,
int& nColTitleLines,
252 int& nCol,
int& nDataRows,
int** ColIsFloat_ptr)
254 int nScanLinesMAX = 100;
255 int nScanLines = nScanLinesMAX;
259 char* scanLine = mdp_alloc_char_1(MAX_INPUT_STR_LN+1,
'\0');
260 int* numCommas = mdp_alloc_int_1(nScanLinesMAX, -1);
261 int* ColIsFloat = *ColIsFloat_ptr;
270 for (i = 0; i < nScanLinesMAX; i++) {
271 retn = read_line(fp, scanLine, 0);
280 int ccount =
static_cast<int>(strlen(scanLine));
282 if (scanLine[ccount-1] ==
',') {
283 scanLine[ccount-1] =
'\0';
289 char* cptr = scanLine;
293 cetn = strchr(cptr, (
int)
',');
300 if (maxCommas < numCommas[i]) {
301 maxCommas = numCommas[i];
308 nCol = maxCommas + 1;
309 if (nScanLines == 0) {
312 char** strlets = (
char**) mdp_alloc_ptr_1(maxCommas+1);
318 for (i = 0; i < nScanLines; i++) {
319 retn = read_line(fp, scanLine, 0);
320 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
321 if (ncolsFound == (maxCommas + 1)) {
322 for (j = 0; j < ncolsFound; j++) {
323 char* fieldStr = strlets[j];
324 fillTokStruct(&fieldToken, fieldStr);
325 if (fieldToken.ntokes != 1) {
329 (void) tok_to_double(&fieldToken, DBL_MAX,
330 -DBL_MAX, 0.0, &rerr);
332 ColIsFloat[j] =
true;
341 int doingLineType = LT_TITLELINE;
343 for (i = 0; i < nScanLines; i++) {
344 retn = read_line(fp, scanLine, 0);
349 int ccount =
static_cast<int>(strlen(scanLine));
351 if (scanLine[ccount-1] ==
',') {
352 scanLine[ccount-1] =
'\0';
355 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
357 if (doingLineType == LT_TITLELINE) {
358 if (numCommas[i] == maxCommas) {
359 doingLineType = LT_COLTITLE;
364 if (doingLineType == LT_COLTITLE) {
365 bool goodDataLine =
true;
367 for (j = 0; j < ncolsFound; j++) {
368 char* fieldStr = strlets[j];
369 fillTokStruct(&fieldToken, fieldStr);
370 if (fieldToken.ntokes != 1) {
371 goodDataLine =
false;
374 if ((ColIsFloat[j]) == 1) {
375 (void) tok_to_double(&fieldToken, DBL_MAX,
376 -DBL_MAX, 0.0, &rerr);
378 goodDataLine =
false;
384 doingLineType = LT_DATALINE;
386 nColTitleLines = i - nTitleLines;
388 if (doingLineType == LT_DATALINE) {
397 if (doingLineType == LT_DATALINE) {
398 for (i = nColTitleLines + nTitleLines; ; i++) {
399 retn = read_line(fp, scanLine, 0);
401 nDataRows = i - nColTitleLines - nTitleLines + 1;
408 int ccount =
static_cast<int>(strlen(scanLine));
410 if (scanLine[ccount-1] ==
',') {
411 scanLine[ccount-1] =
'\0';
414 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
415 bool goodDataLine =
true;
417 for (j = 0; j < ncolsFound; j++) {
418 char* fieldStr = strlets[j];
419 fillTokStruct(&fieldToken, fieldStr);
420 if (fieldToken.ntokes != 1) {
421 goodDataLine =
false;
424 if (ColIsFloat[j] == 1) {
425 (void) tok_to_double(&fieldToken, DBL_MAX,
426 -DBL_MAX, 0.0, &rerr);
428 goodDataLine =
false;
433 if (! goodDataLine) {
434 doingLineType = LT_NULLLINE;
435 nDataRows = i - nColTitleLines - nTitleLines + 1;
440 mdp_safe_free((
void**) &strlets);
441 mdp_safe_free((
void**) &scanLine);
442 mdp_safe_free((
void**) &numCommas);
451 read_title(FILE* fp,
char** *title,
int nTitleLines)
454 *title = (
char**) mdp_alloc_ptr_1(nTitleLines);
455 char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1,
'\0');
456 for (
int i = 0; i < nTitleLines ; i++) {
457 retn = read_line(fp, scanLine, 0);
463 int ccount =
static_cast<int>(strlen(scanLine));
465 if (scanLine[ccount-1] ==
',') {
466 scanLine[ccount-1] =
'\0';
469 (*title)[i] = mdp_copy_string(scanLine);
472 mdp_safe_free((
void**) &scanLine);
480 read_colTitle(FILE* fp,
char**** ColMLNames_ptr,
int nColTitleLines,
int nCol)
483 *ColMLNames_ptr = (
char***) mdp_alloc_ptr_1(nCol);
484 char** *ColMLNames = *ColMLNames_ptr;
485 char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1,
'\0');
486 char** strlets = (
char**) mdp_alloc_ptr_1(nCol+1);
487 if (nColTitleLines > 0) {
488 for (
int i = 0; i < nColTitleLines ; i++) {
489 retn = read_line(fp, scanLine, 0);
495 int ccount =
static_cast<int>(strlen(scanLine));
497 if (scanLine[ccount-1] ==
',') {
498 scanLine[ccount-1] =
'\0';
501 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
502 ColMLNames[i] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
503 for (j = 0; j < ncolsFound; j++) {
505 strcpy(ColMLNames[i][j], strlets[j]);
510 ColMLNames[0] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
511 for (j = 0; j < nCol; j++) {
513 sprintf(cbuff,
"Col_%d", j+1);
514 strcpy(ColMLNames[0][j], cbuff);
517 mdp_safe_free((
void**) &scanLine);
518 mdp_safe_free((
void**) &strlets);
525 static double get_atol(
const double* values,
const int nvals,
529 double sum = 0.0, retn;
533 for (i = 0; i < nvals; i++) {
539 return ((retn + 1.0) * atol);
547 read_values(FILE* fp,
double** NVValues,
char** *NSValues,
int nCol,
int nDataRows,
550 char** strlets = (
char**) mdp_alloc_ptr_1(nCol+1);
551 char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1,
'\0');
555 for (
int i = 0; i < nDataRows; i++) {
556 retn = read_line(fp, scanLine, 0);
564 int ccount =
static_cast<int>(strlen(scanLine));
566 if (scanLine[ccount-1] ==
',') {
567 scanLine[ccount-1] =
'\0';
570 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
571 bool goodDataLine =
true;
573 for (j = 0; j < ncolsFound; j++) {
574 char* fieldStr = strlets[j];
575 NSValues[j][i] = mdp_copy_string(strlets[j]);
576 fillTokStruct(&fieldToken, fieldStr);
577 if (fieldToken.ntokes != 1) {
578 goodDataLine =
false;
582 value = tok_to_double(&fieldToken, DBL_MAX,
583 -DBL_MAX, 0.0, &rerr);
585 goodDataLine =
false;
588 NVValues[j][i] = value;
591 if (! goodDataLine) {
595 mdp_safe_free((
void**) &strlets);
596 mdp_safe_free((
void**) &scanLine);
602 static void print_usage()
605 printf(
" csvdiff [-h] [-a atol] [-r rtol] File1.csv File2.csv\n");
607 printf(
"\tCompares the variable values in two Excel formatted "
608 "comma separated files.\n");
609 printf(
"\tThe comparison is done using a weighted norm basis.\n");
611 printf(
"\tThe two files should be basically equal. However, File1.csv is\n");
612 printf(
"\ttaken as the reference file that has precedence, when there is\n");
613 printf(
"\tsomething to be decided upon.\n");
615 printf(
"\t Arguments:\n");
616 printf(
"\t -h = Usage info\n");
617 printf(
"\t -a atol = Set absolute tolerance parameter - default = 1.0E-9\n");
618 printf(
"\t -r rtol = Set relative tolerance parameter - default = 1.0E-3\n");
620 printf(
"\t Shell Return Values:\n");
621 printf(
"\t 0 = Comparison was successful\n");
622 printf(
"\t 1 = One or more nodal values failed the comparison\n");
623 printf(
"\t 2 = One or more header values failed the comparison\n");
624 printf(
"\t 3 = Apples to oranges, the files can not even be compared against\n");
625 printf(
"\t one another.\n");
632 int main(
int argc,
char* argv[])
639 char* fileName1=NULL, *fileName2=NULL;
640 FILE* fp1=NULL, *fp2=NULL;
641 int nTitleLines1 = 0, nTitleLines2 = 0;
642 int nColTitleLines1 = 0, nColTitleLines2 = 0;
643 int nCol1 = 0, nCol2 = 0, nColMAX = 0, nColcomparisons = 0;
644 int nDataRows1 = 0, nDataRows2 = 0;
645 char** title1 = 0, **title2 = 0;
646 int** compColList = NULL;
647 char** *ColMLNames1 = NULL, *** ColMLNames2 = NULL;
648 char** ColNames1 = NULL, **ColNames2 = NULL;
649 double** NVValues1 = NULL, **NVValues2 = NULL;
650 char** *NSValues1 = NULL, *** NSValues2 = NULL;
651 int* ColIsFloat1 = NULL, *ColIsFloat2 = NULL;
652 double* curVarValues1 = NULL, *curVarValues2 = NULL;
653 char** curStringValues1 = NULL, **curStringValues2 = NULL;
654 int i, j, ndiff, jmax, i1, i2, k;
656 double max_diff, rel_diff;
657 int testPassed = RT_PASSED;
658 double atol_j, atol_arg = 0.0, rtol_arg = 0.0;
669 while ((opt_let = getopt(argc, argv,
"ha:r:")) != EOF) {
684 id = sscanf(ggg,
"%lg", &atol_arg);
686 printf(
" atol param bad: %s\n", ggg);
697 id2 = sscanf(rrr,
"%lg", &rtol_arg);
699 printf(
" rtol param bad: %s\n", rrr);
708 printf(
"default called opt_let = %c\n", opt_let);
709 fprintf(stderr,
"ERROR in command line usuage:\n");
716 if (optind != argc-2) {
720 fileName1 = argv[argc-2];
721 fileName2 = argv[argc-1];
728 printf(
"----------------------------------------------------------\n");
729 printf(
"csvdiff: CSVFile comparison utility program\n");
730 printf(
" Harry K. Moffat Div. 9114 Sandia National Labs\n");
732 printf(
" First CSV File = %s\n", fileName1);
733 printf(
" Second CSV file = %s\n", fileName2);
735 printf(
" Absolute tol = %g\n", gatol);
736 printf(
" Relative tol = %g\n", grtol);
737 printf(
"----------------------------------------------------------\n");
743 if (!(fp1 = fopen(fileName1,
"r"))) {
744 fprintf(stderr,
"Error opening up file1, %s\n", fileName1);
747 if (!(fp2 = fopen(fileName2,
"r"))) {
748 fprintf(stderr,
"Error opening up file2, %s\n", fileName2);
752 ColIsFloat1 = mdp_alloc_int_1(200, 0);
753 ColIsFloat2 = mdp_alloc_int_1(200, 0);
758 get_sizes(fp1, nTitleLines1, nColTitleLines1, nCol1, nDataRows1, &ColIsFloat1);
760 printf(
"Number of columns in file %s is zero\n", fileName1);
761 testPassed = RT_FAILED_OTHER;
762 exit(RT_FAILED_OTHER);
764 if (nDataRows1 == 0) {
765 printf(
"Number of data rows in file %s is zero\n", fileName1);
766 testPassed = RT_FAILED_OTHER;
767 exit(RT_FAILED_OTHER);
770 get_sizes(fp2, nTitleLines2, nColTitleLines2, nCol2, nDataRows2, &ColIsFloat2);
772 printf(
"Number of columns in file %s is zero\n", fileName2);
773 testPassed = RT_FAILED_OTHER;
774 exit(RT_FAILED_OTHER);
776 if (nDataRows2 == 0) {
777 printf(
"Number of data rows in file %s is zero\n", fileName2);
778 testPassed = RT_FAILED_OTHER;
779 exit(RT_FAILED_OTHER);
782 if (nTitleLines1 != nTitleLines2) {
783 printf(
"Number of Title Lines differ:, %d %d\n",nTitleLines1, nTitleLines2);
784 testPassed = RT_FAILED_OTHER;
785 }
else if (Debug_Flag) {
786 printf(
"Number of Title Lines in each file = %d\n", nTitleLines1);
788 if (nColTitleLines1 != nColTitleLines2) {
789 printf(
"Number of Column title lines differ:, %d %d\n", nColTitleLines1,
791 testPassed = RT_FAILED_OTHER;
792 }
else if (Debug_Flag) {
793 printf(
"Number of column title lines in each file = %d\n", nColTitleLines1);
800 int nDataRowsMIN = MIN(nDataRows1, nDataRows2);
801 int nDataRowsMAX = MAX(nDataRows1, nDataRows2);
802 if (nDataRows1 != nDataRows2) {
803 printf(
"Number of Data rows in file1, %d, is different than file2, %d\n",
804 nDataRows1, nDataRows2);
806 printf(
"Number of Data rows in both files = %d\n", nDataRowsMIN);
811 read_title(fp1, &title1, nTitleLines1);
812 read_title(fp2, &title2, nTitleLines2);
814 if (nTitleLines1 > 0 || nTitleLines2 > 0) {
815 int n = MIN(nTitleLines1, nTitleLines2);
816 for (i = 0; i < n; i++) {
817 if (strcmp(title1[i], title2[i]) != 0) {
818 printf(
"Title Line %d differ:\n\t\"%s\"\n\t\"%s\"\n", i, title1[i], title2[i]);
819 testPassed = RT_FAILED_HDR;
820 }
else if (Debug_Flag) {
821 printf(
"Title Line %d for each file: \"%s\"\n", i, title1[i]);
824 if (nTitleLines1 != nTitleLines2) {
825 printf(
"Number of Title Lines differ: %d %d\n", nTitleLines1, nTitleLines2);
826 testPassed = RT_FAILED_HDR;
829 if (nTitleLines1 != nTitleLines2) {
831 printf(
"Titles differ: title for first file: \"%s\"\n",
833 testPassed = RT_FAILED_HDR;
836 printf(
"Titles differ: title for second file: \"%s\"\n",
839 testPassed = RT_FAILED_HDR;
847 if (nCol1 != nCol2) {
848 printf(
"Number of column variables differ:, %d %d\n",
850 testPassed = RT_FAILED_OTHER;
851 }
else if (Debug_Flag) {
852 printf(
"Number of column variables in both files = %d\n",
859 read_colTitle(fp1, &ColMLNames1, nColTitleLines1, nCol1);
860 read_colTitle(fp2, &ColMLNames2, nColTitleLines2, nCol2);
861 ColNames1 = ColMLNames1[0];
862 ColNames2 = ColMLNames2[0];
868 nColMAX = MAX(nCol1, nCol2);
870 compColList = mdp_alloc_int_2(nColMAX, 2, -1);
872 for (i = 0; i < nCol1; i++) {
874 for (j = 0; j < nCol2; j++) {
875 if (!strcmp(ColNames1[i], ColNames2[j])) {
876 compColList[nColcomparisons][0] = i;
877 compColList[nColcomparisons][1] = j;
884 printf(
"csvdiff WARNING Variable %s (%d) in first file not found"
885 " in second file\n", ColNames1[i], i);
886 testPassed = RT_FAILED_OTHER;
889 for (j = 0; j < nCol2; j++) {
891 for (i = 0; i < nColcomparisons; i++) {
892 if (compColList[i][1] == j) {
897 printf(
"csvdiff WARNING Variable %s (%d) in second file "
898 "not found in first file\n",
900 testPassed = RT_FAILED_OTHER;
907 NVValues1 = mdp_alloc_dbl_2(nCol1, nDataRowsMAX, 0.0);
908 NVValues2 = mdp_alloc_dbl_2(nCol2, nDataRowsMAX, 0.0);
913 NSValues1 = (
char***) mdp_alloc_ptr_2(nCol1, nDataRowsMAX);
914 NSValues2 = (
char***) mdp_alloc_ptr_2(nCol2, nDataRowsMAX);
919 read_values(fp1, NVValues1, NSValues1, nCol1, nDataRows1, ColIsFloat1);
920 read_values(fp2, NVValues2, NSValues2, nCol2, nDataRows2, ColIsFloat2);
926 double slope1, slope2, xatol;
928 for (k = 0; k < nColcomparisons; k++) {
930 i1 = compColList[k][0];
931 i2 = compColList[k][1];
932 bool doFltComparison =
true;
933 if (!ColIsFloat1[i1]) {
934 doFltComparison =
false;
937 if (!ColIsFloat2[i2]) {
938 doFltComparison =
false;
941 curStringValues1 = NSValues1[i1];
942 curStringValues2 = NSValues2[i2];
945 if (doFltComparison) {
946 curVarValues1 = NVValues1[i1];
947 curVarValues2 = NVValues2[i2];
948 atol_j = get_atol(curVarValues1, nDataRows1, gatol);
949 atol_j = MIN(atol_j, get_atol(curVarValues2, nDataRows2, gatol));
950 for (j = 0; j < nDataRowsMIN; j++) {
954 xatol = fabs(grtol * (NVValues1[0][j] - NVValues1[0][j-1]));
955 if (j > 0 && k > 0) {
956 slope1 = (curVarValues1[j] - curVarValues1[j-1])/
957 (NVValues1[0][j] - NVValues1[0][j-1]);
958 slope2 = (curVarValues2[j] - curVarValues2[j-1])/
959 (NVValues2[0][j] - NVValues2[0][j-1]);
962 notOK = diff_double_slope(curVarValues1[j], curVarValues2[j],
963 grtol, atol_j, xatol, slope1, slope2);
965 notOK = diff_double(curVarValues1[j], curVarValues2[j],
970 rel_diff = calc_rdiff((
double) curVarValues1[j],
971 (
double) curVarValues2[j], grtol, atol_j);
972 if (rel_diff > max_diff) {
977 printf(
"\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
978 printf(
" differ: %g %g\n", curVarValues1[j],
984 for (j = 0; j < nDataRowsMIN; j++) {
985 strip(curStringValues1[j]);
986 strip(curStringValues2[j]);
988 if (strcmp(curStringValues1[j], curStringValues2[j])) {
992 printf(
"\tColumn String variable %s at data row %d ", ColNames1[i1], j + 1);
993 printf(
" differ: %s %s\n", curStringValues1[j],
994 curStringValues2[j]);
1000 if (nDataRowsMIN != nDataRowsMAX) {
1001 ndiff += nDataRowsMAX - nDataRowsMIN;
1003 if (nDataRows1 > nDataRows2) {
1004 for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
1005 printf(
"\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
1006 printf(
" differ: %g NA\n", curVarValues1[j]);
1009 for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
1010 printf(
"\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
1011 printf(
" differ: NA %g \n", curVarValues2[j]);
1023 "Column variable %s failed comparison test for %d occurrences\n",
1024 ColNames1[i1], ndiff);
1026 printf(
" Largest difference was at data row %d ", jmax + 1);
1027 printf(
": %g %g\n", curVarValues1[jmax], curVarValues2[jmax]);
1029 testPassed = RT_FAILED_COL;
1030 }
else if (Debug_Flag) {
1031 printf(
"Column variable %s passed\n", ColNames1[i1]);