cprover
Loading...
Searching...
No Matches
c_preprocess.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module:
4
5Author: Daniel Kroening, kroening@kroening.com
6
7\*******************************************************************/
8
9#include "c_preprocess.h"
10
11#include <util/c_types.h>
12#include <util/config.h>
13#include <util/message.h>
14#include <util/prefix.h>
15#include <util/run.h>
16#include <util/suffix.h>
17#include <util/tempfile.h>
18#include <util/unicode.h>
19
20#include <fstream>
21
22static void error_parse_line(
23 const std::string &line,
24 bool warning_only,
25 messaget &message)
26{
27 std::string error_msg=line;
28 source_locationt saved_error_location;
29
30 if(has_prefix(line, "file "))
31 {
32 const char *tptr=line.c_str();
33 int state=0;
34 std::string file, line_no, column, _error_msg, function;
35
36 tptr+=5;
37
38 char previous=0;
39
40 while(*tptr!=0)
41 {
42 if(has_prefix(tptr, " line ") && state != 4)
43 {
44 state=1;
45 tptr+=6;
46 continue;
47 }
48 else if(has_prefix(tptr, " column ") && state != 4)
49 {
50 state=2;
51 tptr+=8;
52 continue;
53 }
54 else if(has_prefix(tptr, " function ") && state != 4)
55 {
56 state=3;
57 tptr+=10;
58 continue;
59 }
60 else if(*tptr==':' && state!=4)
61 {
62 if(tptr[1]==' ' && previous!=':')
63 {
64 state=4;
65 tptr++;
66 while(*tptr==' ') tptr++;
67 continue;
68 }
69 }
70
71 if(state==0) // file
72 file+=*tptr;
73 else if(state==1) // line number
74 line_no+=*tptr;
75 else if(state==2) // column
76 column+=*tptr;
77 else if(state==3) // function
78 function+=*tptr;
79 else if(state==4) // error message
80 _error_msg+=*tptr;
81
82 previous=*tptr;
83
84 tptr++;
85 }
86
87 if(state==4)
88 {
89 saved_error_location.set_file(file);
90 saved_error_location.set_function(function);
91 saved_error_location.set_line(line_no);
92 saved_error_location.set_column(column);
93 error_msg=_error_msg;
94 }
95 }
96 else if(has_prefix(line, "In file included from "))
97 {
98 }
99 else
100 {
101 const char *tptr=line.c_str();
102 int state=0;
103 std::string file, line_no;
104
105 while(*tptr!=0)
106 {
107 if(state==0)
108 {
109 if(*tptr==':')
110 state++;
111 else
112 file+=*tptr;
113 }
114 else if(state==1)
115 {
116 if(*tptr==':')
117 state++;
118 else if(isdigit(*tptr))
119 line_no+=*tptr;
120 else
121 state=3;
122 }
123
124 tptr++;
125 }
126
127 if(state==2)
128 {
129 saved_error_location.set_file(file);
130 saved_error_location.set_function(irep_idt());
131 saved_error_location.set_line(line_no);
132 saved_error_location.set_column(irep_idt());
133 }
134 }
135
137 warning_only ? message.warning() : message.error();
138 m.source_location=saved_error_location;
139 m << error_msg << messaget::eom;
140}
141
142static void error_parse(
143 std::istream &errors,
144 bool warning_only,
145 messaget &message)
146{
147 std::string line;
148
149 while(std::getline(errors, line))
150 error_parse_line(line, warning_only, message);
151}
152
155 std::istream &instream,
156 std::ostream &outstream,
157 message_handlert &message_handler)
158{
159 temporary_filet tmp_file("tmp.stdin", ".c");
160
161 std::ofstream tmp(tmp_file());
162
163 if(!tmp)
164 {
165 messaget message(message_handler);
166 message.error() << "failed to open temporary file" << messaget::eom;
167 return true; // error
168 }
169
170 tmp << instream.rdbuf(); // copy
171
172 tmp.close(); // flush
173
174 bool result=c_preprocess(tmp_file(), outstream, message_handler);
175
176 return result;
177}
178
180static bool is_dot_i_file(const std::string &path)
181{
182 return has_suffix(path, ".i") || has_suffix(path, ".ii");
183}
184
187 const std::string &, std::ostream &, message_handlert &);
189 const std::string &, std::ostream &, message_handlert &);
191 const std::string &,
192 std::ostream &,
196 const std::string &, std::ostream &, message_handlert &);
198 const std::string &, std::ostream &, message_handlert &);
199
201 const std::string &path,
202 std::ostream &outstream,
203 message_handlert &message_handler)
204{
205 switch(config.ansi_c.preprocessor)
206 {
208 return c_preprocess_codewarrior(path, outstream, message_handler);
209
211 return
213 path, outstream, message_handler, config.ansi_c.preprocessor);
214
216 return
218 path, outstream, message_handler, config.ansi_c.preprocessor);
219
221 return c_preprocess_visual_studio(path, outstream, message_handler);
222
224 return c_preprocess_arm(path, outstream, message_handler);
225
227 return c_preprocess_none(path, outstream, message_handler);
228 }
229
230 // not reached
231 return true;
232}
233
236 const std::string &file,
237 std::ostream &outstream,
238 message_handlert &message_handler)
239{
240 // check extension
242 return c_preprocess_none(file, outstream, message_handler);
243
244 messaget message(message_handler);
245
246 // use Visual Studio's CL
247
248 temporary_filet stderr_file("tmp.stderr", "");
249 temporary_filet command_file_name("tmp.cl-cmd", "");
250
251 {
252 std::ofstream command_file(command_file_name());
253
254 // This marks the command file as UTF-8, which Visual Studio
255 // understands.
256 command_file << char(0xef) << char(0xbb) << char(0xbf);
257
258 command_file << "/nologo" << '\n';
259 command_file << "/E" << '\n';
260
261 // This option will make CL produce utf-8 output, as
262 // opposed to 8-bit with some code page.
263 // It only works on Visual Studio 2015 or newer.
264 command_file << "/source-charset:utf-8" << '\n';
265
266 command_file << "/D__CPROVER__" << "\n";
267 command_file << "/D__WORDSIZE=" << config.ansi_c.pointer_width << "\n";
268
270 {
271 command_file << "\"/D__PTRDIFF_TYPE__=long long int\"" << "\n";
272 // yes, both _WIN32 and _WIN64 get defined
273 command_file << "/D_WIN64" << "\n";
274 }
275 else if(config.ansi_c.int_width == 16 && config.ansi_c.pointer_width == 32)
276 {
277 // 16-bit LP32 is an artificial architecture we simulate when using --16
280 "Pointer difference expected to be long int typed");
281 command_file << "/D__PTRDIFF_TYPE__=long" << '\n';
282 }
283 else
284 {
287 "Pointer difference expected to be int typed");
288 command_file << "/D__PTRDIFF_TYPE__=int" << "\n";
289 }
290
291 if(config.ansi_c.char_is_unsigned)
292 command_file << "/J" << "\n"; // This causes _CHAR_UNSIGNED to be defined
293
294 for(const auto &define : config.ansi_c.defines)
295 command_file << "/D" << shell_quote(define) << "\n";
296
297 for(const auto &include_path : config.ansi_c.include_paths)
298 command_file << "/I" << shell_quote(include_path) << "\n";
299
300 for(const auto &include_file : config.ansi_c.include_files)
301 command_file << "/FI" << shell_quote(include_file) << "\n";
302
303 // Finally, the file to be preprocessed
304 // (this is already in UTF-8).
305 command_file << shell_quote(file) << "\n";
306 }
307
308 // _popen isn't very reliable on WIN32
309 // that's why we use run()
310 int result =
311 run("cl", {"cl", "@" + command_file_name()}, "", outstream, stderr_file());
312
313 // errors/warnings
314 std::ifstream stderr_stream(stderr_file());
315 error_parse(stderr_stream, result==0, message);
316
317 if(result!=0)
318 {
319 message.error() << "CL Preprocessing failed" << messaget::eom;
320 return true;
321 }
322
323 return false;
324}
325
328 std::istream &instream,
329 std::ostream &outstream)
330{
331 // CodeWarrior prepends some header to the file,
332 // marked with '#' signs.
333 // We skip over it.
334 //
335 // CodeWarrior has an ugly way of marking lines, e.g.:
336 //
337 // /* #line 1 "__ppc_eabi_init.cpp" /* stack depth 0 */
338 //
339 // We remove the initial '/* ' prefix
340
341 std::string line;
342
343 while(instream)
344 {
345 std::getline(instream, line);
346
347 if(line.size()>=2 &&
348 line[0]=='#' && (line[1]=='#' || line[1]==' ' || line[1]=='\t'))
349 {
350 // skip the line!
351 }
352 else if(line.size()>=3 &&
353 line[0]=='/' && line[1]=='*' && line[2]==' ')
354 {
355 outstream << line.c_str()+3 << "\n"; // strip the '/* '
356 }
357 else
358 outstream << line << "\n";
359 }
360}
361
364 const std::string &file,
365 std::ostream &outstream,
366 message_handlert &message_handler)
367{
368 // check extension
370 return c_preprocess_none(file, outstream, message_handler);
371
372 // preprocessing
373 messaget message(message_handler);
374
375 temporary_filet stderr_file("tmp.stderr", "");
376
377 std::vector<std::string> command = {
378 "mwcceppc", "-E", "-P", "-D__CPROVER__", "-ppopt", "line", "-ppopt full"};
379
380 for(const auto &define : config.ansi_c.defines)
381 command.push_back(" -D" + define);
382
383 for(const auto &include_path : config.ansi_c.include_paths)
384 command.push_back(" -I" + include_path);
385
386 for(const auto &include_file : config.ansi_c.include_files)
387 {
388 command.push_back(" -include");
389 command.push_back(include_file);
390 }
391
392 for(const auto &opt : config.ansi_c.preprocessor_options)
393 command.push_back(opt);
394
395 temporary_filet tmpi("tmp.cl", "");
396 command.push_back(file);
397 command.push_back("-o");
398 command.push_back(tmpi());
399
400 int result = run(command[0], command, "", "", stderr_file());
401
402 std::ifstream stream_i(tmpi());
403
404 if(stream_i)
405 {
406 postprocess_codewarrior(stream_i, outstream);
407
408 stream_i.close();
409 }
410 else
411 {
412 message.error() << "Preprocessing failed (fopen failed)"
413 << messaget::eom;
414 return true;
415 }
416
417 // errors/warnings
418 std::ifstream stderr_stream(stderr_file());
419 error_parse(stderr_stream, result==0, message);
420
421 if(result!=0)
422 {
423 message.error() << "Preprocessing failed" << messaget::eom;
424 return true;
425 }
426
427 return false;
428}
429
432 const std::string &file,
433 std::ostream &outstream,
434 message_handlert &message_handler,
436{
437 // check extension
439 return c_preprocess_none(file, outstream, message_handler);
440
441 // preprocessing
442 messaget message(message_handler);
443
444 temporary_filet stderr_file("tmp.stderr", "");
445
446 std::vector<std::string> argv;
447
449 argv.push_back("clang");
450 else
451 argv.push_back("gcc");
452
453 argv.push_back("-E");
454 argv.push_back("-D__CPROVER__");
455
456 const irep_idt &arch = config.ansi_c.arch;
457
458 if(config.ansi_c.pointer_width == 16)
459 {
460 if(arch == "i386" || arch == "x86_64" || arch == "x32")
461 argv.push_back("-m16");
462 else if(arch.starts_with("mips"))
463 argv.push_back("-mips16");
464 }
465 else if(config.ansi_c.pointer_width == 32)
466 {
467 if(arch == "i386" || arch == "x86_64")
468 argv.push_back("-m32");
469 else if(arch == "x32")
470 argv.push_back("-mx32");
471 else if(arch.starts_with("mips"))
472 argv.push_back("-mabi=32");
473 else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
474 argv.push_back("-m32");
475 else if(arch == "s390" || arch == "s390x")
476 argv.push_back("-m31"); // yes, 31, not 32!
477 else if(arch == "sparc" || arch == "sparc64")
478 argv.push_back("-m32");
479 }
480 else if(config.ansi_c.pointer_width == 64)
481 {
482 if(arch == "i386" || arch == "x86_64" || arch == "x32")
483 argv.push_back("-m64");
484 else if(arch.starts_with("mips"))
485 argv.push_back("-mabi=64");
486 else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
487 argv.push_back("-m64");
488 else if(arch == "s390" || arch == "s390x")
489 argv.push_back("-m64");
490 else if(arch == "sparc" || arch == "sparc64")
491 argv.push_back("-m64");
492 }
493
494 // The width of wchar_t depends on the OS!
495 if(config.ansi_c.wchar_t_width == config.ansi_c.short_int_width)
496 argv.push_back("-fshort-wchar");
497
498 if(config.ansi_c.char_is_unsigned)
499 argv.push_back("-funsigned-char");
500
501 if(config.ansi_c.os == configt::ansi_ct::ost::NO_OS)
502 argv.push_back("-nostdinc");
503
504 // Set the standard
505 if(
506 has_suffix(file, ".cpp") || has_suffix(file, ".CPP") ||
507#ifndef _WIN32
508 has_suffix(file, ".C") ||
509#endif
510 has_suffix(file, ".c++") || has_suffix(file, ".C++") ||
511 has_suffix(file, ".cp") || has_suffix(file, ".CP") ||
512 has_suffix(file, ".cc") || has_suffix(file, ".cxx"))
513 {
514 switch(config.cpp.cpp_standard)
515 {
517#if defined(__OpenBSD__)
519 argv.push_back("-std=c++98");
520 else
521#endif
522 argv.push_back("-std=gnu++98");
523 break;
524
526#if defined(__OpenBSD__)
528 argv.push_back("-std=c++03");
529 else
530#endif
531 argv.push_back("-std=gnu++03");
532 break;
533
535#if defined(__OpenBSD__)
537 argv.push_back("-std=c++11");
538 else
539#endif
540 argv.push_back("-std=gnu++11");
541 break;
542
544#if defined(__OpenBSD__)
546 argv.push_back("-std=c++14");
547 else
548#endif
549 argv.push_back("-std=gnu++14");
550 break;
551
553#if defined(__OpenBSD__)
555 argv.push_back("-std=c++17");
556 else
557#endif
558 argv.push_back("-std=gnu++17");
559 break;
560 }
561 }
562 else
563 {
564 switch(config.ansi_c.c_standard)
565 {
567#if defined(__OpenBSD__)
569 argv.push_back("-std=c89");
570 else
571#endif
572 argv.push_back("-std=gnu89");
573 break;
574
576#if defined(__OpenBSD__)
578 argv.push_back("-std=c99");
579 else
580#endif
581 argv.push_back("-std=gnu99");
582 break;
583
585#if defined(__OpenBSD__)
587 argv.push_back("-std=c11");
588 else
589#endif
590 argv.push_back("-std=gnu11");
591 break;
592
594#if defined(__OpenBSD__)
596 argv.push_back("-std=c17");
597 else
598#endif
599 argv.push_back("-std=gnu17");
600 break;
601
603#if defined(__OpenBSD__)
605 argv.push_back("-std=c2x");
606 else
607#endif
608 argv.push_back("-std=gnu2x");
609 break;
610 }
611 }
612
613 for(const auto &define : config.ansi_c.defines)
614 argv.push_back("-D" + define);
615
616 for(const auto &include_path : config.ansi_c.include_paths)
617 argv.push_back("-I" + include_path);
618
619 for(const auto &include_file : config.ansi_c.include_files)
620 {
621 argv.push_back("-include");
622 argv.push_back(include_file);
623 }
624
625 for(const auto &opt : config.ansi_c.preprocessor_options)
626 argv.push_back(opt);
627
628 int result;
629
630 #if 0
631 // the following forces the mode
632 switch(config.ansi_c.mode)
633 {
634 case configt::ansi_ct::flavourt::GCC_C: command+=" -x c"; break;
635 case configt::ansi_ct::flavourt::GCC_CPP: command+=" -x c++"; break;
636 default:
637 {
638 }
639 }
640 #endif
641
642 // the file that is to be preprocessed
643 argv.push_back(file);
644
645 // execute clang or gcc
646 result = run(argv[0], argv, "", outstream, stderr_file());
647
648 // errors/warnings
649 std::ifstream stderr_stream(stderr_file());
650 error_parse(stderr_stream, result==0, message);
651
652 if(result!=0)
653 {
654 message.error() << "GCC preprocessing failed" << messaget::eom;
655 return true;
656 }
657
658 return false;
659}
660
663 const std::string &file,
664 std::ostream &outstream,
665 message_handlert &message_handler)
666{
667 // check extension
669 return c_preprocess_none(file, outstream, message_handler);
670
671 // preprocessing using armcc
672 messaget message(message_handler);
673
674 temporary_filet stderr_file("tmp.stderr", "");
675
676 std::vector<std::string> argv;
677
678 argv.push_back("armcc");
679 argv.push_back("-E");
680 argv.push_back("-D__CPROVER__");
681
683 argv.push_back("--bigend");
684 else
685 argv.push_back("--littleend");
686
687 if(config.ansi_c.char_is_unsigned)
688 argv.push_back("--unsigned_chars");
689 else
690 argv.push_back("--signed_chars");
691
692 // Set the standard
693 // https://developer.arm.com/documentation/101458/2404/Standards-support/Supported-C-C---standards-in-Arm-C-C---Compiler
694 switch(config.ansi_c.c_standard)
695 {
697 argv.push_back("--c90");
698 break;
699
701 argv.push_back("--c99");
702 break;
703
705 argv.push_back("--c11");
706 break;
707
709 argv.push_back("--c17");
710 break;
711
713 // C23 is not yet supported by armcc
714 argv.push_back("--c17");
715 break;
716 }
717
718 for(const auto &define : config.ansi_c.defines)
719 argv.push_back("-D" + define);
720
721 for(const auto &include_path : config.ansi_c.include_paths)
722 argv.push_back("-I" + include_path);
723
724 // the file that is to be preprocessed
725 argv.push_back(file);
726
727 int result;
728
729 // execute armcc
730 result = run(argv[0], argv, "", outstream, stderr_file());
731
732 // errors/warnings
733 std::ifstream stderr_stream(stderr_file());
734 error_parse(stderr_stream, result==0, message);
735
736 if(result!=0)
737 {
738 message.error() << "ARMCC preprocessing failed" << messaget::eom;
739 return true;
740 }
741
742 return false;
743}
744
747 const std::string &file,
748 std::ostream &outstream,
749 message_handlert &message_handler)
750{
751 std::ifstream infile(widen_if_needed(file));
752
753 if(!infile)
754 {
755 messaget message(message_handler);
756 message.error() << "failed to open '" << file << "'" << messaget::eom;
757 return true;
758 }
759
761 {
762 // special treatment for "/* #line"
763 postprocess_codewarrior(infile, outstream);
764 }
765 else
766 {
767 char ch;
768
769 while(infile.read(&ch, 1))
770 outstream << ch;
771 }
772
773 return false;
774}
775
777const char c_test_program[]=
778 "#include <stdlib.h>\n"
779 "\n"
780 "int main() { }\n";
781
783{
784 std::ostringstream out;
785 std::istringstream in(c_test_program);
786
787 return c_preprocess(in, out, message_handler);
788}
configt config
Definition config.cpp:25
bool c_preprocess_visual_studio(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
bool c_preprocess_arm(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
bool c_preprocess_none(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
static void error_parse(std::istream &errors, bool warning_only, messaget &message)
const char c_test_program[]
tests ANSI-C preprocessing
bool c_preprocess_codewarrior(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
static void error_parse_line(const std::string &line, bool warning_only, messaget &message)
void postprocess_codewarrior(std::istream &instream, std::ostream &outstream)
post-processing specifically for CodeWarrior
bool c_preprocess(std::istream &instream, std::ostream &outstream, message_handlert &message_handler)
ANSI-C preprocessing.
bool test_c_preprocessor(message_handlert &message_handler)
bool c_preprocess_gcc_clang(const std::string &, std::ostream &, message_handlert &, configt::ansi_ct::preprocessort)
ANSI-C preprocessing.
static bool is_dot_i_file(const std::string &path)
ANSI-C preprocessing.
signedbv_typet signed_long_int_type()
Definition c_types.cpp:72
signedbv_typet signed_int_type()
Definition c_types.cpp:22
signedbv_typet pointer_diff_type()
Definition c_types.cpp:220
signedbv_typet signed_long_long_int_type()
Definition c_types.cpp:79
bool starts_with(const char *s) const
equivalent of as_string().starts_with(s)
Definition dstring.h:95
source_locationt source_location
Definition message.h:239
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:154
mstreamt & error() const
Definition message.h:391
mstreamt & warning() const
Definition message.h:396
static eomt eom
Definition message.h:289
void set_column(const irep_idt &column)
void set_file(const irep_idt &file)
void set_line(const irep_idt &line)
void set_function(const irep_idt &function)
bool has_prefix(const std::string &s, const std::string &prefix)
Definition converter.cpp:13
int run(const std::string &what, const std::vector< std::string > &argv)
Definition run.cpp:48
std::string shell_quote(const std::string &src)
quote a string for bash and CMD
Definition run.cpp:451
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:534
Definition kdev_t.h:19
bool has_suffix(const std::string &s, const std::string &suffix)
Definition suffix.h:17
#define widen_if_needed(s)
Definition unicode.h:28
dstringt irep_idt