Formally, C compilation proceeds in 8 phases:

Phase 1: multibyte characters are mapped to source characters and
trigraphs are translated.

Phase 2: backslash-newline is deleted (backslash-newline-EOF is error).

Phase 3: break source into preprocessing tokens and whitespace.  A file
shall not end in partial pptoken or partial comment.  Comments are
conceptually replaced by single spaces.

Phase 4: preprocessing directives (including #include) are executed,
including expanding macros.

Phase 5: character constants and strings are converted from the source
character set to the execution character set.

Phase 6: adjacent string literals are concatenated.

Phase 7: preprocessing tokens are converted to tokens, including
discarding whitespace.  Parsing and compilation proper happen.

Phase 8: linking.

Some of these are ignorable for us: pahse 1, because we support neither
multibyte source characters nor trigraphs, and phase 5, because our
source and execution character sets are identical.

header name
	< h-char+ >
	" q-char+ "
	h-char = any char except newline and >
	q-char = any char except newline and "
identifier:
	[A-Za-z_][A-Za-z_0-9]*
ppnumber
	[.]?[0-9]([0-9a-zA-Z_]|[eEpP][-+]|[.])*
charconst
string
punct
other single non-whitespace character
(placemarker - see 6.10)


integer-constant
	[1-9][0-9]*$SUFFIX
	0[0-7]*$SUFFIX
	0[xX][0-9a-fA-F]+$SUFFIX
	$SUFFIX = {,[uU],[lL],[lL][lL],[uU][lL],[lL][uU],[uU][lL][lL],[lL][lL][uU]}
floating-constant
	decimal-floating-cosntant
	hexadecimal-floating-cosntant

	decimal-floating-cosntant =
		fractional-constant exponent-part-opt floating-suffix-opt
		digit-sequence exponent-part floating-suffix-opt
	hexadecimal-floating-cosntant =
		hexadecimal-prefix hexadecimal-fractional-constant binary-exponent-part floating-suffix-opt
		hexadecimal-prefix hexadecimal-digit-sequence binary-exponent-part floating-suffix-opt
	fractional-constant =
		digit-sequence-opt . digit-sequence
		digit-sequence .
	exponent-part =
		e sign-opt digit-sequence
		E sign-opt digit-sequence
	sign =
		+ or -
	digit-sequence =
		[0-9]+
	hexadecimal-fractional-constant =
		hexadecimal-digit-sequence-opt . hexadecimal-digit-sequence
		hexadecimal-digit-sequence .
	binary-exponent-part
		[pP] sign-opt digit-sequence
	hexadecimal-digit-sequence =
		[0-9a-fA-F]*
	floating-suffix =
		[fFlL]
enum-constant
	identifier
character-constant
	' c-char-sequence '

	c-char-sequence =
		c-char+
	c-char =
		any char except ', \, newline
		escape-sequence
	escape-sequence =
		simple-escape-sequence
		octal-escape-sequence
		hexadecimal-escape-sequence
		universal-escape-sequence
	simple-escape-sequence =
		\['"?\abfnrtv]
	octal-escape-sequence =
		\ octal-digit{1,3}
	hexadecimal-escape-sequence =
		\x hexadecimal-digit+
	  TT_STRING,
" s-char* "
s-char =
	any character except ", \, or newline
	escape-sequence

preprocessing-file:
	group-opt
group:
	group-part+
group-part:
	if-section
	control-line
	text-line
	# non-directive
if-section:
	if-group elif-groups-opt else-group-opt endif-line
if-group:
	# if constant-expr newline group-opt
	# ifdef identifier newline group-opt
	# ifndef identifier newline group-opt
elif-groups:
	elif-group+
elif-group:
	# elif constant-expr newline group-opt
else-group:
	# else newline group-opt
endif-line:
	# endif newline
control-line
	# include pp-tokens newline
	# define identifier replacement-list newline
	# define identifier def-lparen identifier-list-opt ) replacement-list newline
	# define identifier def-lparen ... ) replacement-list newline
	# define identifier def-lparen identifier-list , ... ) replacement-list newline
	# undef identifier newline
	# line pp-tokens newline
	# error pp-tokens newline
	# pragma pp-tokens newline
	# newline
text-line:
	pp-tokens-opt newline
non-directive:
	pp-tokens newline
def-lparen
	a ( which is not immediately preceded by whitespace
replacement-list
	pp-tokens-opt
pp-tokens:
	preprocessing-token+

*/