%{
/* xid.l - extract identifiers from c source		Steve Simon		2005 */
#include <bio.h>

#undef YYLMAX
#define YYLMAX	(1024 * 16)

#define FUNC_CALL	1
#define FUNC_DEF	2

struct {			/* last function name found */
	int line;
	int fnum;
	int brace;
	char *name;
} func;

int	nolvars = 0;		/* don't extract info from variables */
int	nocomments = 0;		/* don't extract info from comments */

Biobuf	*symb;			/* symbols index */
Biobuf	*file;			/* filenames index */
int	fnum = 0;		/* file "number" */
int	incomment = 0;		/* inside a comment */
int	line = 0;		/* current line number */
int	brace = 0;		/* nesting level of braces */
int	gotid = 0;		/* have a buffered identifier */
char	*infile = "stdin";	/* current file name */
char	*prefix = NULL;		/* prefix to remove from filenames */

int yywrap(void);
void vardef(void);
void varuse(void);
void charconst(void);
void keepfun(void);
void outfun(int);
void outdef(void);
void getstring(void);
void getcomment(void);
void xid(Biobuf *);

%}
%p 3000

letter		[A-Za-z_]
digit		[0-9]
ws		[ \t]

%%


const		|
volatile	|
void		|
int		|
char		|
float		|
double		|
struct		|
union		|
long		|
short		|
unsigned	|
auto		|
extern		|
register	|
typedef		|
static		|
goto		|
return{ws}*\(	|
return		|
sizeof		|
sizeof{ws}*\(	|
break		|
continue       	|
if{ws}*\(	|
else		|
for{ws}*\(	|
do		|
while{ws}*\(	|
switch{ws}*\(	|
case		|
default		|
entry		|
enum		|
define		|
undef		|
ifdef		|
ifndef		|
include		|
defined{ws}*\(	|
endif		{ ; }


"\"" { getstring(); }

"\n" { line++; }

{letter}({letter}|{digit})*{ws}*\( { keepfun(); }		/* function */

";" { outfun(';'); }

"{" { outfun('{'); brace++; }

"}" { brace--; }

"/*" { getcomment(); }

^#{ws}*line.*$ ; 			/* #line */

^#{ws}*include.*$ ; 			/* #include */

^#{ws}*define{ws}+{letter}({letter}|{digit})* { outdef(); } 	/* #define */

{letter}({letter}|{digit})*{ws}+{letter}({letter}|{digit})*{ws}*[;,] { vardef(); }		/* variable definition */

{letter}({letter}|{digit})* { varuse(); }						/* variable use */

'[^\\']'         |
'\\{digit}{1,3}' |
'\\[\\bfrnlt"']' { charconst(); }; 	/* character constant */

. ;					/* delete everything else */

%%

int
yywrap(void)
{
	return(1);
}


void
vardef(void)	/* variable definition */
{
	if (brace == 0)
		Bprint(symb, "g %d %d %s\n", fnum, line, yytext);
	else
		Bprint(symb, "l %d %d %s\n", fnum, line, yytext);
}


void
varuse(void)	/* variable use */
{
	if (!nolvars)
		Bprint(symb, "v %d %d %s\n", fnum, line, yytext);
}


void
charconst(void)	/* character constant */
{
	Bprint(symb, "c %d %d %s\n", fnum, line, yytext);
}


void
keepfun(void)	/* rembember function, definition or call */
{
	int i;

	if ((i = strlen(yytext)) > 1)		/* trim bracket */
		yytext[i -1] = 0;
	func.fnum = fnum;
	func.line = line;
	func.brace = brace;
	if (func.name)
		free(func.name);
	func.name = strdup(yytext);
	if (func.name == nil)
		sysfatal("no memory\n");

}


void
outfun(int c) /* function call definition, or prototype */
{
	if (func.line == 0)
		return;

	if (brace)
		Bprint(symb, "f %d %d %s\n", func.fnum, func.line, func.name);
	else
	if (c == '{')
		Bprint(symb, "F %d %d %s\n", func.fnum, func.line, func.name);
	else
		Bprint(symb, "P %d %d %s\n", func.fnum, func.line, func.name);

	func.line = 0;
}


void
outdef(void) /* #define */
{
	int i;
	char *p;

	/* trim op to last first whitespace */
	if ((i = strlen(yytext)) > 1) {
		for (p = &yytext[i -1]; *p != ' ' && *p != '\t' && *p; p--)
			continue;
		p++;
		Bprint(symb, "d %d %d %s\n", fnum, line, p);
	}
}


void
getstring(void)
{
	char c;
	int p = 0;

	Bprint(symb, "s %d %d ", fnum, line);
	while (1) {
		switch (c = input()) {
		case '\n':
			line++;
		case '\r':
		case '\b':
		case '\f':
		case '\t':
		case ' ' :
			if (p == 0)
				continue;
			c = ' ';
			break;
		case 0:
			fprint(2, "%s:%d EOF in string\n", infile, line);
			goto fini;
		case '"':
			if (p != '\\')
				goto fini;
			break;
		default:
			break;
		}
		if (p != ' ' || c != ' ')
			Bputc(symb, c);

		if (p == '\\' && c == '\\')	/* kludge around the string \\" */
			p = '#';
		else
			p = c;
	}

fini:
	Bputc(symb, '\n');
}


void
getcomment(void)
{
	char c, p = 0;

	Bprint(symb, "x %d %d ", fnum, line);
	while (1) {
		switch (c = input()) {
		case '\n':
			line++;
		case '\r':
		case '\b':
		case '\f':
		case '\t':
		case ' ' :
			if (p == 0)
				continue;
			c = ' ';
			break;
		case 0:
			fprint(2, "%s:%d EOF in comment\n", infile, line);
			goto fini;
		case '*':
			p = c;
			continue;
		case '/':
			if (p == '*')
				goto fini;
			break;
		default:
			break;
		}
		if (p == '*')			/* catchup delayed '*' */
			Bputc(symb, p);
		if (!(p == ' ' && c == ' '))	/* colapse multiple spaces into one */
			Bputc(symb, c);
		p = c;
	}

fini:
	Bputc(symb, '\n');
}



void
xid(Biobuf *bi)
{
	int l;
	char *p, *buf;

	l = (prefix)? strlen(prefix): 0;

	while ((buf = Brdline(bi, '\n')) != NULL) {
		buf[Blinelen(bi)-1] = 0;
		if ((p = strchr(buf, '\n')) != NULL)
			*p = 0;
		if ((infd = open(buf, OREAD)) == NULL) {
			fprint(2, "%s can't open\n", buf);
			continue;
		}
		if (prefix && strncmp(buf, prefix, l) == 0)
			Bprint(file, "%d %s\n", ++fnum, buf+l);
		else
			Bprint(file, "%d %s\n", ++fnum, buf);
		infile = buf;
		line = 1;
		incomment = 0;
		brace = 0;

		yylex();
		close(infd);
	}

}


void
usage(void)
{
	fprint(2, "usage: %s [-vc] [-p path] [index-base]\n", argv0);
	exits("usage");
}


void
main(int argc, char *argv[])
{
	Biobuf bin;
	char *fi, *sy;

	ARGBEGIN {
	case 'c':
		nocomments = 1;
		break;
	case 'v':
		nolvars = 1;
		break;
	case 'p':
		prefix = EARGF(usage());
		break;
	default:
		usage();
	}ARGEND;

	if (argc > 1)
		usage();

	if (argc == 0) {
		fi = "files.idx";
		sy = "symbs.idx";
	} else {
		fi = smprint("%s/files.idx", argv[0]);
		sy = smprint("%s/symbs.idx", argv[0]);
	}
	if (fi == nil || sy == nil)
		sysfatal("no memory");

	if ((file = Bopen(fi, OWRITE)) == NULL)
		sysfatal("%s can't open for writing", fi);
	if ((symb = Bopen(sy, OWRITE)) == NULL)
		sysfatal("%s can't open for writing", sy);

	Binit(&bin, 0, OREAD);
	xid(&bin);
	Bterm(symb);
	Bterm(file);
	exits(0);
}