Cantera  2.0
stringUtils.cpp
Go to the documentation of this file.
1 /**
2  * @file stringUtils.cpp
3  * Contains definitions for string manipulation functions
4  * within Cantera.
5  */
6 // Copyright 2001 California Institute of Technology
7 
8 //@{
9 #include "cantera/base/ct_defs.h"
10 
11 #ifdef _MSC_VER
12 #define SNPRINTF _snprintf
13 #else
14 #define SNPRINTF snprintf
15 #endif
16 //@}
17 
20 #include "cantera/base/global.h"
21 #include "cantera/base/ctml.h"
22 
23 #include <string>
24 #include <sstream>
25 #include <cstdio>
26 #include <cstring>
27 #include <cstdlib>
28 #include <cctype>
29 
30 namespace Cantera
31 {
32 
33 //================================================================================================
34 // Convert a double into a c++ string
35 /*
36  * This routine doesn't assume a formatting. You
37  * must supply the formatting
38  *
39  * @param x double to be converted
40  * @param fmt Format to be used (printf style)
41  */
42 std::string fp2str(const double x, const std::string& fmt)
43 {
44  char buf[64];
45  int n = SNPRINTF(buf, 63, fmt.c_str(), x);
46  if (n > 0) {
47  buf[63] = '\0';
48  return std::string(buf);
49  }
50  return std::string(" ");
51 }
52 std::string fp2str(const double x)
53 {
54  char buf[64];
55  int n = SNPRINTF(buf, 64, "%g" , x);
56  if (n > 0) {
57  buf[29] = '\0';
58  return std::string(buf);
59  }
60  return std::string(" ");
61 }
62 //================================================================================================
63 /*
64  * Convert an integer number to a std::string using sprintf.
65  */
66 std::string int2str(const int n, const std::string& fmt)
67 {
68  char buf[30];
69  int m = SNPRINTF(buf, 30, fmt.c_str(), n);
70  if (m > 0) {
71  buf[29] = '\0';
72  return std::string(buf);
73  }
74  return std::string(" ");
75 }
76 //================================================================================================
77 // Convert an int to a string
78 /*
79  * @param n int to be converted
80  */
81 std::string int2str(const int n)
82 {
83  char buf[30];
84  int m = SNPRINTF(buf, 30, "%d", n);
85  if (m > 0) {
86  buf[29] = '\0';
87  return std::string(buf);
88  }
89  return std::string(" ");
90 }
91 //================================================================================================
92 // Convert an int to a string
93 /*
94  * @param n int to be converted
95  */
96 std::string int2str(const size_t n)
97 {
98  std::stringstream ss;
99  ss << n;
100  return ss.str();
101 }
102 //================================================================================================
103 std::string lowercase(const std::string& s)
104 {
105  int n = static_cast<int>(s.size());
106  std::string lc(s);
107  for (int i = 0; i < n; i++) {
108  lc[i] = (char) tolower(s[i]);
109  }
110  return lc;
111 }
112 //================================================================================================
113 //! Return the position of the first printable
114 //! character in the string
115 /*!
116  * @param s input string
117  * @return Returns an int representing the first
118  * printable string. If none returns the
119  * size of the string.
120  */
121 static int firstChar(const std::string& s)
122 {
123  int i;
124  int n = static_cast<int>(s.size());
125  for (i = 0; i < n; i++) {
126  if (s[i] != ' ' && isprint(s[i])) {
127  break;
128  }
129  }
130  return i;
131 }
132 //================================================================================================
133 //! Return the position of the last printable
134 //! character in the string
135 /*!
136  * @param s input string
137  * @return Returns an int representing the first
138  * printable string. If none returns
139  * -1.
140  */
141 static int lastChar(const std::string& s)
142 {
143  int i;
144  int n = static_cast<int>(s.size());
145  for (i = n-1; i >= 0; i--)
146  if (s[i] != ' ' && isprint(s[i])) {
147  break;
148  }
149  return i;
150 }
151 //================================================================================================
152 // Strip the leading and trailing white space
153 // from a string
154 /*
155  * The command isprint() is used to determine printable
156  * characters.
157  *
158  * @param s Input string
159  * @return Returns a copy of the string, stripped
160  * of leading and trailing white space
161  */
162 std::string stripws(const std::string& s)
163 {
164  int ifirst = firstChar(s);
165  int ilast = lastChar(s);
166  return s.substr(ifirst, ilast - ifirst + 1);
167 }
168 //================================================================================================
169 // Strip non-printing characters wherever they are
170 /*
171  * @param s Input string
172  * @return Returns a copy of the string,
173  * stripped of all non-printing characters.
174  */
175 std::string stripnonprint(const std::string& s)
176 {
177  int i;
178  int n = static_cast<int>(s.size());
179  std::string ss = "";
180  for (i = 0; i < n; i++) {
181  if (isprint(s[i])) {
182  ss += s[i];
183  }
184  }
185  return ss;
186 }
187 //================================================================================================
188 // Parse a composition string into a map consisting of individual key:composition
189 // pairs.
190 /*
191  * The composition is a double.
192  * Example
193  *
194  * Input is
195  *
196  * "fire:0 ice:1 snow:2"
197  *
198  * Output is
199  * x["fire"] = 0
200  * x["ice"] = 1
201  * x["snow"] = 2
202  *
203  * @param ss original string consisting of multiple key:composition
204  * pairs on multiple lines
205  * @param x Output map consisting of a composition
206  * map, which is a string to double map
207  */
208 void parseCompString(const std::string& ss, Cantera::compositionMap& x)
209 {
210  std::string s = ss;
211  std::string::size_type icolon, ibegin, iend;
212  std::string name, num, nm;
213  do {
214  ibegin = s.find_first_not_of(", ;\n\t");
215  if (ibegin != std::string::npos) {
216  s = s.substr(ibegin,s.size());
217  icolon = s.find(':');
218  iend = s.find_first_of(", ;\n\t");
219  //icomma = s.find(',');
220  if (icolon != std::string::npos) {
221  name = s.substr(0, icolon);
222  if (iend != std::string::npos) {
223  num = s.substr(icolon+1, iend-icolon);
224  s = s.substr(iend+1, s.size());
225  } else {
226  num = s.substr(icolon+1, s.size());
227  s = "";
228  }
229  nm = stripws(name);
230  if (x.find(nm) == x.end()) {
231  throw CanteraError("parseCompString",
232  "unknown species " + nm);
233  }
234  x[nm] = atof(num.c_str());
235  } else {
236  s = "";
237  }
238  }
239  } while (s != "");
240 }
241 //================================================================================================
242 // Parse a composition string into individual key:composition
243 // pairs
244 /*
245  *
246  * @param ss original string consisting of multiple key:composition
247  * pairs on multiple lines
248  * @param w Output vector consisting of single key:composition
249  * items in each index.
250  */
251 void split(const std::string& ss, std::vector<std::string>& w)
252 {
253  std::string s = ss;
254  std::string::size_type ibegin, iend;
255  std::string name, num, nm;
256  do {
257  ibegin = s.find_first_not_of(", ;\n\t");
258  if (ibegin != std::string::npos) {
259  s = s.substr(ibegin,s.size());
260  iend = s.find_first_of(", ;\n\t");
261  if (iend != std::string::npos) {
262  w.push_back(s.substr(0, iend));
263  s = s.substr(iend+1, s.size());
264  } else {
265  w.push_back(s.substr(0, s.size()));
266  return;
267  }
268  }
269  } while (s != "");
270 }
271 //================================================================================================
272 int fillArrayFromString(const std::string& str,
273  doublereal* const a, const char delim)
274 {
275  std::string::size_type iloc;
276  int count = 0;
277  std::string num;
278  std::string s = str;
279  while (s.size() > 0) {
280  iloc = s.find(delim);
281  if (iloc > 0) {
282  num = s.substr(0, iloc);
283  s = s.substr(iloc+1,s.size());
284  } else {
285  num = s;
286  s = "";
287  }
288  a[count] = atofCheck(num.c_str());
289  count++;
290  }
291  return count;
292 }
293 //================================================================================================
294 // Get the file name without the path or extension
295 /*
296  * @param fullPath Input file name consisting
297  * of the full file name
298  *
299  * @return Returns the basename
300  */
301 std::string getBaseName(const std::string& path)
302 {
303  std::string file;
304  size_t idot = path.find_last_of('.');
305  size_t islash = path.find_last_of('/');
306  if (idot > 0 && idot < path.size()) {
307  if (islash > 0 && islash < idot) {
308  file = path.substr(islash+1, idot-islash-1);
309  } else {
310  file = path.substr(0,idot);
311  }
312  } else {
313  file = path;
314  }
315  return file;
316 }
317 //================================================================================================
318 int intValue(std::string val)
319 {
320  return std::atoi(stripws(val).c_str());
321 }
322 //================================================================================================
323 doublereal fpValue(std::string val)
324 {
325  return std::atof(stripws(val).c_str());
326 }
327 //================================================================================================
328 doublereal fpValueCheck(std::string val)
329 {
330  return atofCheck(stripws(val).c_str());
331 }
332 //================================================================================================
333 // Generate a logfile name based on an input file name
334 /*
335  * It tries to find the basename. Then, it appends a .log
336  * to it.
337  *
338  * @param infile Input file name
339  *
340  * @return Returns a logfile name
341  */
342 std::string logfileName(const std::string& infile)
343 {
344  std::string logfile = getBaseName(infile);
345  logfile += ".log";
346  return logfile;
347 }
348 //================================================================================================
349 // Line wrap a string via a copy operation
350 /*
351  * @param s Input string to be line wrapped
352  * @paramlen Length at which to wrap. The
353  * default is 70.
354  */
355 std::string wrapString(const std::string& s, const int len)
356 {
357  int count=0;
358  std::string r;
359  for (size_t n = 0; n < s.size(); n++) {
360  if (s[n] == '\n') {
361  count = 0;
362  } else {
363  count++;
364  }
365  if (count > len && s[n] == ' ') {
366  r += "\n ";
367  count = 0;
368  }
369  r += s[n];
370  }
371  return r;
372 }
373 //================================================================================================
374 // Parse a name string, separating out the phase name from the species name
375 /*
376  * Name strings must not contain these internal characters "; \n \t "
377  * Only one colon is allowed, the one separating the phase name from the
378  * species name. Therefore, names may not include a colon.
379  *
380  * @param nameStr (input) Name string containing the phase name and the species
381  * name separated by a colon. The phase name is optional.
382  * example: "silane:SiH4"
383  * @param phaseName (output) Name of the phase, if specified. If not specified,
384  * a blank string is returned.
385  * @return (output) Species name is returned. If nameStr is blank
386  * an empty string is returned.
387  */
388 std::string parseSpeciesName(const std::string& nameStr, std::string& phaseName)
389 {
390  std::string s = stripws(nameStr);
391  std::string::size_type ibegin, iend, icolon;
392  phaseName = "";
393  ibegin = s.find_first_not_of(" ;\n\t");
394  if (ibegin != std::string::npos) {
395  s = s.substr(ibegin,s.size());
396  icolon = s.find(':');
397  iend = s.find_first_of(" ;\n\t");
398  if (icolon != std::string::npos) {
399  phaseName = s.substr(0, icolon);
400  s = s.substr(icolon+1, s.size());
401  icolon = s.find(':');
402  if (icolon != std::string::npos) {
403  throw CanteraError("parseSpeciesName()", "two colons in name: " + nameStr);
404  }
405  }
406  if (iend != std::string::npos) {
407  throw CanteraError("parseSpeciesName()",
408  "Species name has \" ;/\n/\t\" in the middle of it: " + nameStr);
409  }
410  }
411  return s;
412 }
413 //================================================================================================
414 // Routine strips off white space from a c character string
415 /*
416  * This routine strips off blanks and tabs (only leading and trailing
417  * characters) in 'str'. On return, it returns the number of
418  * characters still included in the string (excluding the null character).
419  *
420  * Comments are excluded -> All instances of the comment character, '!',
421  * are replaced by '\0' thereby terminating
422  * the string
423  *
424  * Parameter list:
425  *
426  * @param str On output 'str' contains the same characters as on
427  * input except the leading and trailing white space and
428  * comments have been removed.
429  */
430 int stripLTWScstring(char str[])
431 {
432  int i = 0, j = 0;
433  char ch;
434  const char COM_CHAR='\0';
435  /*
436  * Quick Returns
437  */
438  if ((str == 0) || (str[0] == '\0')) {
439  return (0);
440  }
441 
442  /* Find first non-space character character */
443  while (((ch = str[i]) != '\0') && isspace(ch)) {
444  i++;
445  }
446 
447  /*
448  * Move real part of str to the front by copying the string
449  * - Comments are handled here, by terminating the copy at the
450  * first comment indicator, and inserting the null character at
451  * that point.
452  */
453 
454  while ((ch = str[j+i]) != '\0' &&
455  (ch != COM_CHAR)) {
456  str[j] = ch;
457  j++;
458  }
459  str[j] = '\0';
460  j--;
461  /* Remove trailing white space by inserting a null character */
462  while ((j != -1) && isspace(str[j])) {
463  j--;
464  }
465  j++;
466  str[j] = '\0';
467  return (j);
468 }
469 //================================================================================================
470 // Translate a char string into a single double
471 /*
472  * atofCheck is a wrapper around the C stdlib routine atof().
473  * It does quite a bit more error checking than atof() or
474  * strtod(), and is quite a bit more restrictive.
475  *
476  * First it interprets both E, e, d, and D as exponents.
477  * atof() only interprets e or E as an exponent character.
478  *
479  * It only accepts a string as well formed if it consists as a
480  * single token. Multiple words will produce an error message
481  *
482  * It will produce an error for NAN and inf entries as well,
483  * in contrast to atof() or strtod().
484  * The user needs to know that a serious numerical issue
485  * has occurred.
486  *
487  * It does not accept hexadecimal numbers.
488  *
489  * @param dptr pointer to the input c string
490  * @return Returns the double
491  *
492  * On any error, it will throw a CanteraError signal.
493  */
494 doublereal atofCheck(const char* const dptr)
495 {
496  if (!dptr) {
497  throw CanteraError("atofCheck", "null pointer to string");
498  }
499  char* eptr = (char*) malloc(strlen(dptr)+1);
500  strcpy(eptr, dptr);
501  int ll = stripLTWScstring(eptr);
502  if (ll == 0) {
503  throw CanteraError("atofCheck", "string has zero length");
504  }
505  int numDot = 0;
506  int numExp = 0;
507  char ch;
508  int istart = 0;
509  ch = eptr[0];
510  if (ch == '+' || ch == '-') {
511  istart = 1;
512  }
513  for (int i = istart; i < ll; i++) {
514  ch = eptr[i];
515  if (isdigit(ch)) {
516  } else if (ch == '.') {
517  numDot++;
518  if (numDot > 1) {
519  free(eptr);
520  throw CanteraError("atofCheck",
521  "string has more than one .");
522  }
523  } else if (ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
524  numExp++;
525  eptr[i] = 'E';
526  if (numExp > 1) {
527  free(eptr);
528  throw CanteraError("atofCheck",
529  "string has more than one exp char");
530  }
531  ch = eptr[i+1];
532  if (ch == '+' || ch == '-') {
533  i++;
534  }
535  } else {
536  std::string hh(dptr);
537  free(eptr);
538  throw CanteraError("atofCheck",
539  "Trouble processing string, " + hh);
540  }
541  }
542  doublereal rval = atof(eptr);
543  free(eptr);
544  return rval;
545 }
546 //================================================================================================
547 // Interpret one or two token string as a single double
548 /*
549  * This is similar to atof(). However, the second token
550  * is interpreted as an MKS units string and a conversion
551  * factor to MKS is applied.
552  *
553  * Example
554  * " 1.0 atm"
555  *
556  * results in the number 1.01325e5
557  *
558  * @param strSI string to be converted. One or two tokens
559  *
560  * @return returns a converted double
561  */
562 doublereal strSItoDbl(const std::string& strSI)
563 {
564  std::vector<std::string> v;
565  tokenizeString(strSI, v);
566  doublereal fp = 1.0;
567  size_t n = v.size();
568  if (n > 2 || n < 1) {
569  throw CanteraError("strSItoDbl",
570  "number of tokens is too high");
571  } else if (n == 2) {
572  fp = toSI(v[1]);
573  }
574  doublereal val = atofCheck(v[0].c_str());
575  return (val * fp);
576 }
577 //================================================================================================
578 //! Find the first white space in a string
579 /*!
580  * Returns the location of the first white space character in a string
581  *
582  * @param val Input string to be parsed
583  * @return In a size_type variable, return the location of the first white space character.
584  * Return npos if none is found
585  */
586 static std::string::size_type findFirstWS(const std::string& val)
587 {
588  std::string::size_type ibegin = std::string::npos;
589  int j = 0;
590  std::string::const_iterator i = val.begin();
591  for (; i != val.end(); i++) {
592  char ch = *i;
593  int ll = (int) ch;
594  if (isspace(ll)) {
595  ibegin = (std::string::size_type) j;
596  break;
597  }
598  j++;
599  }
600  return ibegin;
601 }
602 //================================================================================================
603 //! Find the first non-white space in a string
604 /*!
605  * Returns the location of the first non-white space character in a string
606  *
607  * @param val Input string to be parsed
608  * @return In a size_type variable, return the location of the first nonwhite space character.
609  * Return npos if none is found
610  */
611 static std::string::size_type findFirstNotOfWS(const std::string& val)
612 {
613  std::string::size_type ibegin = std::string::npos;
614  int j = 0;
615  std::string::const_iterator i = val.begin();
616  for (; i != val.end(); i++) {
617  char ch = *i;
618  int ll = (int) ch;
619  if (!isspace(ll)) {
620  ibegin = (std::string::size_type) j;
621  break;
622  }
623  j++;
624  }
625  return ibegin;
626 }
627 //================================================================================================
628 // This function separates a string up into tokens
629 // according to the location of white space.
630 /*
631  * The separate tokens are returned in a string vector, v.
632  *
633  * @param oval String to be broken up
634  * @param v Output vector of tokens.
635  */
636 void tokenizeString(const std::string& oval,
637  std::vector<std::string>& v)
638 {
639  std::string val(oval);
640  std::string::size_type ibegin, iend;
641  v.clear();
642  while (1 > 0) {
643  ibegin = findFirstNotOfWS(val);
644  if (ibegin != std::string::npos) {
645  val = val.substr(ibegin,val.size());
646  iend = findFirstWS(val);
647  if (iend == std::string::npos) {
648  v.push_back(val);
649  break;
650  } else {
651  v.push_back(val.substr(0,iend));
652  val = val.substr(iend+1,val.size());
653  }
654  } else {
655  break;
656  }
657  }
658 }
659 //================================================================================================
660 
661 }