27 #include "cantera/base/config.h"
35 #if defined(__CYGWIN__)
41 #include "tok_input_util.h"
43 int Debug_Flag =
true;
44 double grtol = 1.0E-3;
45 double gatol = 1.0E-9;
48 #define RT_FAILED_COL 1
49 #define RT_FAILED_HDR 2
50 #define RT_FAILED_OTHER 3
55 int Max_Input_Str_Ln = MAX_INPUT_STR_LN;
68 int getopt(
int argc,
char** argv,
const char*)
70 static int currArg = 1;
71 static int currOptInd = 1;
73 static int charPos = 0;
75 if (currArg >= argc) {
79 tok = string(argv[currArg]);
80 currOptInd = currArg+1;
81 if (currOptInd > argc - 1) {
85 optarg = argv[currArg+1];
87 size_t len = strlen(tok.c_str());
91 tok = string(argv[currArg]);
92 len = strlen(tok.c_str());
93 if (len > 1 && tok[0] ==
'-') {
96 if (len > 2 && tok[1] ==
'-') {
105 if (currArg < (argc-1)) {
116 if (charPos < static_cast<int>(len - 1)) {
130 static int diff_double(
double d1,
double d2,
double rtol,
double atol)
137 if (fabs(d1-d2) > (atol + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
143 static int diff_double_slope(
double d1,
double d2,
double rtol,
144 double atol,
double xtol,
double slope1,
double slope2)
151 double atol2 = xtol*(fabs(slope1) + fabs(slope2));
152 if (fabs(d1-d2) > (atol + atol2 + rtol * 0.5 * (fabs(d1) + fabs(d2)))) {
162 static double calc_rdiff(
double d1,
double d2,
double rtol,
double atol)
171 lhs = atol + rtol * 0.5 * (fabs(d1) + fabs(d2));
193 static int breakStrCommas(
char* str,
char** strlets,
int maxPieces)
202 cetn = strchr(cptr, (
int)
',');
206 strlets[numbreaks] = cptr;
209 }
while (cetn && (numbreaks < (maxPieces - 1)));
212 return numbreaks + 1;
218 #define LT_NULLLINE 0
219 #define LT_TITLELINE 1
220 #define LT_COLTITLE 2
221 #define LT_DATALINE 3
241 static void get_sizes(FILE* fp,
int& nTitleLines,
int& nColTitleLines,
242 int& nCol,
int& nDataRows,
int** ColIsFloat_ptr)
244 int nScanLinesMAX = 100;
245 int nScanLines = nScanLinesMAX;
249 char* scanLine = mdp_alloc_char_1(MAX_INPUT_STR_LN+1,
'\0');
250 int* numCommas = mdp_alloc_int_1(nScanLinesMAX, -1);
251 int* ColIsFloat = *ColIsFloat_ptr;
260 for (i = 0; i < nScanLinesMAX; i++) {
261 retn = read_line(fp, scanLine, 0);
270 int ccount =
static_cast<int>(strlen(scanLine));
272 if (scanLine[ccount-1] ==
',') {
273 scanLine[ccount-1] =
'\0';
279 char* cptr = scanLine;
283 cetn = strchr(cptr, (
int)
',');
290 if (maxCommas < numCommas[i]) {
291 maxCommas = numCommas[i];
298 nCol = maxCommas + 1;
299 if (nScanLines == 0) {
302 char** strlets = (
char**) mdp_alloc_ptr_1(maxCommas+1);
308 for (i = 0; i < nScanLines; i++) {
309 retn = read_line(fp, scanLine, 0);
310 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
311 if (ncolsFound == (maxCommas + 1)) {
312 for (j = 0; j < ncolsFound; j++) {
313 char* fieldStr = strlets[j];
314 fillTokStruct(&fieldToken, fieldStr);
315 if (fieldToken.ntokes != 1) {
319 (void) tok_to_double(&fieldToken, DBL_MAX,
320 -DBL_MAX, 0.0, &rerr);
322 ColIsFloat[j] =
true;
331 int doingLineType = LT_TITLELINE;
333 for (i = 0; i < nScanLines; i++) {
334 retn = read_line(fp, scanLine, 0);
339 int ccount =
static_cast<int>(strlen(scanLine));
341 if (scanLine[ccount-1] ==
',') {
342 scanLine[ccount-1] =
'\0';
345 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
347 if (doingLineType == LT_TITLELINE) {
348 if (numCommas[i] == maxCommas) {
349 doingLineType = LT_COLTITLE;
354 if (doingLineType == LT_COLTITLE) {
355 bool goodDataLine =
true;
357 for (j = 0; j < ncolsFound; j++) {
358 char* fieldStr = strlets[j];
359 fillTokStruct(&fieldToken, fieldStr);
360 if (fieldToken.ntokes != 1) {
361 goodDataLine =
false;
364 if ((ColIsFloat[j]) == 1) {
365 (void) tok_to_double(&fieldToken, DBL_MAX,
366 -DBL_MAX, 0.0, &rerr);
368 goodDataLine =
false;
374 doingLineType = LT_DATALINE;
376 nColTitleLines = i - nTitleLines;
378 if (doingLineType == LT_DATALINE) {
387 if (doingLineType == LT_DATALINE) {
388 for (i = nColTitleLines + nTitleLines; ; i++) {
389 retn = read_line(fp, scanLine, 0);
391 nDataRows = i - nColTitleLines - nTitleLines + 1;
398 int ccount =
static_cast<int>(strlen(scanLine));
400 if (scanLine[ccount-1] ==
',') {
401 scanLine[ccount-1] =
'\0';
404 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
405 bool goodDataLine =
true;
407 for (j = 0; j < ncolsFound; j++) {
408 char* fieldStr = strlets[j];
409 fillTokStruct(&fieldToken, fieldStr);
410 if (fieldToken.ntokes != 1) {
411 goodDataLine =
false;
414 if (ColIsFloat[j] == 1) {
415 (void) tok_to_double(&fieldToken, DBL_MAX,
416 -DBL_MAX, 0.0, &rerr);
418 goodDataLine =
false;
423 if (! goodDataLine) {
424 doingLineType = LT_NULLLINE;
425 nDataRows = i - nColTitleLines - nTitleLines + 1;
430 mdp_safe_free((
void**) &strlets);
431 mdp_safe_free((
void**) &scanLine);
432 mdp_safe_free((
void**) &numCommas);
441 read_title(FILE* fp,
char** *title,
int nTitleLines)
444 *title = (
char**) mdp_alloc_ptr_1(nTitleLines);
445 char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1,
'\0');
446 for (
int i = 0; i < nTitleLines ; i++) {
447 retn = read_line(fp, scanLine, 0);
453 int ccount =
static_cast<int>(strlen(scanLine));
455 if (scanLine[ccount-1] ==
',') {
456 scanLine[ccount-1] =
'\0';
459 (*title)[i] = mdp_copy_string(scanLine);
462 mdp_safe_free((
void**) &scanLine);
470 read_colTitle(FILE* fp,
char**** ColMLNames_ptr,
int nColTitleLines,
int nCol)
473 *ColMLNames_ptr = (
char***) mdp_alloc_ptr_1(nCol);
474 char** *ColMLNames = *ColMLNames_ptr;
475 char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1,
'\0');
476 char** strlets = (
char**) mdp_alloc_ptr_1(nCol+1);
477 if (nColTitleLines > 0) {
478 for (
int i = 0; i < nColTitleLines ; i++) {
479 retn = read_line(fp, scanLine, 0);
485 int ccount =
static_cast<int>(strlen(scanLine));
487 if (scanLine[ccount-1] ==
',') {
488 scanLine[ccount-1] =
'\0';
491 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
492 ColMLNames[i] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
493 for (j = 0; j < ncolsFound; j++) {
495 strcpy(ColMLNames[i][j], strlets[j]);
500 ColMLNames[0] = mdp_alloc_VecFixedStrings(nCol, MAX_TOKEN_STR_LN+1);
501 for (j = 0; j < nCol; j++) {
503 sprintf(cbuff,
"Col_%d", j+1);
504 strcpy(ColMLNames[0][j], cbuff);
507 mdp_safe_free((
void**) &scanLine);
508 mdp_safe_free((
void**) &strlets);
515 static double get_atol(
const double* values,
const int nvals,
519 double sum = 0.0, retn;
523 for (i = 0; i < nvals; i++) {
529 return (retn + 1.0) * atol;
537 read_values(FILE* fp,
double** NVValues,
char** *NSValues,
int nCol,
int nDataRows,
540 char** strlets = (
char**) mdp_alloc_ptr_1(nCol+1);
541 char* scanLine = mdp_alloc_char_1(Max_Input_Str_Ln + 1,
'\0');
545 for (
int i = 0; i < nDataRows; i++) {
546 retn = read_line(fp, scanLine, 0);
554 int ccount =
static_cast<int>(strlen(scanLine));
556 if (scanLine[ccount-1] ==
',') {
557 scanLine[ccount-1] =
'\0';
560 int ncolsFound = breakStrCommas(scanLine, strlets, nCol);
561 bool goodDataLine =
true;
563 for (j = 0; j < ncolsFound; j++) {
564 char* fieldStr = strlets[j];
565 NSValues[j][i] = mdp_copy_string(strlets[j]);
566 fillTokStruct(&fieldToken, fieldStr);
567 if (fieldToken.ntokes != 1) {
568 goodDataLine =
false;
572 value = tok_to_double(&fieldToken, DBL_MAX,
573 -DBL_MAX, 0.0, &rerr);
575 goodDataLine =
false;
578 NVValues[j][i] = value;
581 if (! goodDataLine) {
585 mdp_safe_free((
void**) &strlets);
586 mdp_safe_free((
void**) &scanLine);
592 static void print_usage()
595 printf(
" csvdiff [-h] [-a atol] [-r rtol] File1.csv File2.csv\n");
597 printf(
"\tCompares the variable values in two Excel formatted "
598 "comma separated files.\n");
599 printf(
"\tThe comparison is done using a weighted norm basis.\n");
601 printf(
"\tThe two files should be basically equal. However, File1.csv is\n");
602 printf(
"\ttaken as the reference file that has precedence, when there is\n");
603 printf(
"\tsomething to be decided upon.\n");
605 printf(
"\t Arguments:\n");
606 printf(
"\t -h = Usage info\n");
607 printf(
"\t -a atol = Set absolute tolerance parameter - default = 1.0E-9\n");
608 printf(
"\t -r rtol = Set relative tolerance parameter - default = 1.0E-3\n");
610 printf(
"\t Shell Return Values:\n");
611 printf(
"\t 0 = Comparison was successful\n");
612 printf(
"\t 1 = One or more nodal values failed the comparison\n");
613 printf(
"\t 2 = One or more header values failed the comparison\n");
614 printf(
"\t 3 = Apples to oranges, the files can not even be compared against\n");
615 printf(
"\t one another.\n");
622 int main(
int argc,
char* argv[])
629 char* fileName1=NULL, *fileName2=NULL;
630 FILE* fp1=NULL, *fp2=NULL;
631 int nTitleLines1 = 0, nTitleLines2 = 0;
632 int nColTitleLines1 = 0, nColTitleLines2 = 0;
633 int nCol1 = 0, nCol2 = 0, nColMAX = 0, nColcomparisons = 0;
634 int nDataRows1 = 0, nDataRows2 = 0;
635 char** title1 = 0, **title2 = 0;
636 int** compColList = NULL;
637 char** *ColMLNames1 = NULL, *** ColMLNames2 = NULL;
638 char** ColNames1 = NULL, **ColNames2 = NULL;
639 double** NVValues1 = NULL, **NVValues2 = NULL;
640 char** *NSValues1 = NULL, *** NSValues2 = NULL;
641 int* ColIsFloat1 = NULL, *ColIsFloat2 = NULL;
642 double* curVarValues1 = NULL, *curVarValues2 = NULL;
643 char** curStringValues1 = NULL, **curStringValues2 = NULL;
644 int i, j, ndiff, jmax=0, i1, i2, k;
646 double max_diff, rel_diff;
647 int testPassed = RT_PASSED;
648 double atol_j, atol_arg = 0.0, rtol_arg = 0.0;
659 while ((opt_let = getopt(argc, argv,
"ha:r:")) != EOF) {
674 id = sscanf(ggg,
"%lg", &atol_arg);
676 printf(
" atol param bad: %s\n", ggg);
687 id2 = sscanf(rrr,
"%lg", &rtol_arg);
689 printf(
" rtol param bad: %s\n", rrr);
698 printf(
"default called opt_let = %c\n", opt_let);
699 fprintf(stderr,
"ERROR in command line usuage:\n");
706 if (optind != argc-2) {
710 fileName1 = argv[argc-2];
711 fileName2 = argv[argc-1];
718 printf(
"----------------------------------------------------------\n");
719 printf(
"csvdiff: CSVFile comparison utility program\n");
720 printf(
" Harry K. Moffat Div. 9114 Sandia National Labs\n");
722 printf(
" First CSV File = %s\n", fileName1);
723 printf(
" Second CSV file = %s\n", fileName2);
725 printf(
" Absolute tol = %g\n", gatol);
726 printf(
" Relative tol = %g\n", grtol);
727 printf(
"----------------------------------------------------------\n");
733 if (!(fp1 = fopen(fileName1,
"r"))) {
734 fprintf(stderr,
"Error opening up file1, %s\n", fileName1);
737 if (!(fp2 = fopen(fileName2,
"r"))) {
738 fprintf(stderr,
"Error opening up file2, %s\n", fileName2);
742 ColIsFloat1 = mdp_alloc_int_1(200, 0);
743 ColIsFloat2 = mdp_alloc_int_1(200, 0);
748 get_sizes(fp1, nTitleLines1, nColTitleLines1, nCol1, nDataRows1, &ColIsFloat1);
750 printf(
"Number of columns in file %s is zero\n", fileName1);
751 testPassed = RT_FAILED_OTHER;
752 exit(RT_FAILED_OTHER);
754 if (nDataRows1 == 0) {
755 printf(
"Number of data rows in file %s is zero\n", fileName1);
756 testPassed = RT_FAILED_OTHER;
757 exit(RT_FAILED_OTHER);
760 get_sizes(fp2, nTitleLines2, nColTitleLines2, nCol2, nDataRows2, &ColIsFloat2);
762 printf(
"Number of columns in file %s is zero\n", fileName2);
763 testPassed = RT_FAILED_OTHER;
764 exit(RT_FAILED_OTHER);
766 if (nDataRows2 == 0) {
767 printf(
"Number of data rows in file %s is zero\n", fileName2);
768 testPassed = RT_FAILED_OTHER;
769 exit(RT_FAILED_OTHER);
772 if (nTitleLines1 != nTitleLines2) {
773 printf(
"Number of Title Lines differ:, %d %d\n",nTitleLines1, nTitleLines2);
774 testPassed = RT_FAILED_OTHER;
775 }
else if (Debug_Flag) {
776 printf(
"Number of Title Lines in each file = %d\n", nTitleLines1);
778 if (nColTitleLines1 != nColTitleLines2) {
779 printf(
"Number of Column title lines differ:, %d %d\n", nColTitleLines1,
781 testPassed = RT_FAILED_OTHER;
782 }
else if (Debug_Flag) {
783 printf(
"Number of column title lines in each file = %d\n", nColTitleLines1);
790 int nDataRowsMIN = min(nDataRows1, nDataRows2);
791 int nDataRowsMAX = max(nDataRows1, nDataRows2);
792 if (nDataRows1 != nDataRows2) {
793 printf(
"Number of Data rows in file1, %d, is different than file2, %d\n",
794 nDataRows1, nDataRows2);
796 printf(
"Number of Data rows in both files = %d\n", nDataRowsMIN);
801 read_title(fp1, &title1, nTitleLines1);
802 read_title(fp2, &title2, nTitleLines2);
804 if (nTitleLines1 > 0 || nTitleLines2 > 0) {
805 int n = min(nTitleLines1, nTitleLines2);
806 for (i = 0; i < n; i++) {
807 if (strcmp(title1[i], title2[i]) != 0) {
808 printf(
"Title Line %d differ:\n\t\"%s\"\n\t\"%s\"\n", i, title1[i], title2[i]);
809 testPassed = RT_FAILED_HDR;
810 }
else if (Debug_Flag) {
811 printf(
"Title Line %d for each file: \"%s\"\n", i, title1[i]);
814 if (nTitleLines1 != nTitleLines2) {
815 printf(
"Number of Title Lines differ: %d %d\n", nTitleLines1, nTitleLines2);
816 testPassed = RT_FAILED_HDR;
819 if (nTitleLines1 != nTitleLines2) {
821 printf(
"Titles differ: title for first file: \"%s\"\n",
823 testPassed = RT_FAILED_HDR;
826 printf(
"Titles differ: title for second file: \"%s\"\n",
829 testPassed = RT_FAILED_HDR;
837 if (nCol1 != nCol2) {
838 printf(
"Number of column variables differ:, %d %d\n",
840 testPassed = RT_FAILED_OTHER;
841 }
else if (Debug_Flag) {
842 printf(
"Number of column variables in both files = %d\n",
849 read_colTitle(fp1, &ColMLNames1, nColTitleLines1, nCol1);
850 read_colTitle(fp2, &ColMLNames2, nColTitleLines2, nCol2);
851 ColNames1 = ColMLNames1[0];
852 ColNames2 = ColMLNames2[0];
858 nColMAX = max(nCol1, nCol2);
860 compColList = mdp_alloc_int_2(nColMAX, 2, -1);
862 for (i = 0; i < nCol1; i++) {
864 for (j = 0; j < nCol2; j++) {
865 if (!strcmp(ColNames1[i], ColNames2[j])) {
866 compColList[nColcomparisons][0] = i;
867 compColList[nColcomparisons][1] = j;
874 printf(
"csvdiff WARNING Variable %s (%d) in first file not found"
875 " in second file\n", ColNames1[i], i);
876 testPassed = RT_FAILED_OTHER;
879 for (j = 0; j < nCol2; j++) {
881 for (i = 0; i < nColcomparisons; i++) {
882 if (compColList[i][1] == j) {
887 printf(
"csvdiff WARNING Variable %s (%d) in second file "
888 "not found in first file\n",
890 testPassed = RT_FAILED_OTHER;
897 NVValues1 = mdp_alloc_dbl_2(nCol1, nDataRowsMAX, 0.0);
898 NVValues2 = mdp_alloc_dbl_2(nCol2, nDataRowsMAX, 0.0);
903 NSValues1 = (
char***) mdp_alloc_ptr_2(nCol1, nDataRowsMAX);
904 NSValues2 = (
char***) mdp_alloc_ptr_2(nCol2, nDataRowsMAX);
909 read_values(fp1, NVValues1, NSValues1, nCol1, nDataRows1, ColIsFloat1);
910 read_values(fp2, NVValues2, NSValues2, nCol2, nDataRows2, ColIsFloat2);
916 double slope1, slope2, xatol;
918 for (k = 0; k < nColcomparisons; k++) {
920 i1 = compColList[k][0];
921 i2 = compColList[k][1];
922 bool doFltComparison =
true;
923 if (!ColIsFloat1[i1]) {
924 doFltComparison =
false;
927 if (!ColIsFloat2[i2]) {
928 doFltComparison =
false;
931 curStringValues1 = NSValues1[i1];
932 curStringValues2 = NSValues2[i2];
935 if (doFltComparison) {
936 curVarValues1 = NVValues1[i1];
937 curVarValues2 = NVValues2[i2];
938 atol_j = get_atol(curVarValues1, nDataRows1, gatol);
939 atol_j = min(atol_j, get_atol(curVarValues2, nDataRows2, gatol));
940 for (j = 0; j < nDataRowsMIN; j++) {
944 xatol = fabs(grtol * (NVValues1[0][j] - NVValues1[0][j-1]));
945 if (j > 0 && k > 0) {
946 slope1 = (curVarValues1[j] - curVarValues1[j-1])/
947 (NVValues1[0][j] - NVValues1[0][j-1]);
948 slope2 = (curVarValues2[j] - curVarValues2[j-1])/
949 (NVValues2[0][j] - NVValues2[0][j-1]);
952 notOK = diff_double_slope(curVarValues1[j], curVarValues2[j],
953 grtol, atol_j, xatol, slope1, slope2);
955 notOK = diff_double(curVarValues1[j], curVarValues2[j],
960 rel_diff = calc_rdiff((
double) curVarValues1[j],
961 (
double) curVarValues2[j], grtol, atol_j);
962 if (rel_diff > max_diff) {
967 printf(
"\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
968 printf(
" differ: %g %g\n", curVarValues1[j],
974 for (j = 0; j < nDataRowsMIN; j++) {
975 strip(curStringValues1[j]);
976 strip(curStringValues2[j]);
978 if (strcmp(curStringValues1[j], curStringValues2[j])) {
982 printf(
"\tColumn String variable %s at data row %d ", ColNames1[i1], j + 1);
983 printf(
" differ: %s %s\n", curStringValues1[j],
984 curStringValues2[j]);
990 if (nDataRowsMIN != nDataRowsMAX) {
991 ndiff += nDataRowsMAX - nDataRowsMIN;
993 if (nDataRows1 > nDataRows2) {
994 for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
995 printf(
"\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
996 printf(
" differ: %g NA\n", curVarValues1[j]);
999 for (j = nDataRowsMIN; j < nDataRowsMAX; j++) {
1000 printf(
"\tColumn variable %s at data row %d ", ColNames1[i1], j + 1);
1001 printf(
" differ: NA %g \n", curVarValues2[j]);
1013 "Column variable %s failed comparison test for %d occurrences\n",
1014 ColNames1[i1], ndiff);
1016 printf(
" Largest difference was at data row %d ", jmax + 1);
1017 printf(
": %g %g\n", curVarValues1[jmax], curVarValues2[jmax]);
1019 testPassed = RT_FAILED_COL;
1020 }
else if (Debug_Flag) {
1021 printf(
"Column variable %s passed\n", ColNames1[i1]);