/*
*   $Id: perl.c,v 1.8 2003/04/01 04:55:27 darren Exp $
*
*   Copyright (c) 2000-2003, Darren Hiebert
*
*   This source code is released for free distribution under the terms of the
*   GNU General Public License.
*
*   This module contains functions for generating tags for PERL language
*   files.
*
*   2004/02/02: Cosimo Streppone <cosimo@cpan.org>
*     Extended perl support with constants, globals (declared with our),
*     pragmas, used modules and required modules.
*/

/*
*   INCLUDE FILES
*/
#include "general.h"	/* must always come first */
#include <string.h>
#include "read.h"
#include "vstring.h"

/*
*   DATA DEFINITIONS
*/
typedef enum {
    K_PRAGMA,
    K_USE,
    K_REQUIRE,
    K_CONSTANT,
    K_GLOBAL,
    K_SUBROUTINE,
    K_PACKAGE,
} perlKind;

static kindOption PerlKinds [] = {
    { TRUE, 'm', "pragma",     "pragma declarations"    },
    { TRUE, 'u', "use",        "used modules"           },
    { TRUE, 'r', "require",    "required modules"       },
    { TRUE, 'c', "constant",   "constants"              },
    { TRUE, 'g', "global",     "global variables (our)" },
    { TRUE, 's', "subroutine", "subroutines"            },
    { TRUE, 'p', "package",    "packages"               }
};


/*
 * FUNCTION DEFINITIONS
 */

char* getNextWord( const unsigned char* src, const char* delimiters ) {
    char word[1024] = "";
    char* result;

    /* If next is not space, this is not a use constant statement */
    if(!isspace(*src))
        return NULL;

    /* Skip spaces */
    while(isspace(*src))
        src++;

    /* Get constant name (until '=>' or ',') */
    while(!isspace(*src) && *src != '\0' && strchr(delimiters, (int) *src) == NULL) {
        strncat( word, src, 1 );
        src++;
    }
    /* Terminate word */
    strcat( word, "\0" );
    result = word;
    return result;
}

int parseConstant ( const unsigned char* src ) {
    void* constName = NULL;
    const unsigned char* srcCopy = src;
    int found = 0;

    if( strncmp( (const char*) src, "use constant", (size_t) 12 ) == 0 ) {
	src += 12;                                       /* advance to next word */
	constName = (void*) getNextWord( src, ";=," );   /* probably we found a constant */
	if( constName == NULL ) {
            src = srcCopy;
	} else {
	    found = 1;
            constName = vStringNewInit( constName );
	    makeSimpleTag( constName, PerlKinds, K_CONSTANT );
            vStringDelete( constName );
        }
    }

    return found;
}

int parsePragma ( const unsigned char* src ) {

    boolean use_ahead  = FALSE;             /* tells if there is a "use" keyword ahead */
    boolean no_ahead   = FALSE;             /* tells if there is a "no"  keyword ahead */
    boolean found      = FALSE;

    void* pragmaName = NULL;
    char str[1024];
    const char* pragmaKey  = NULL;
    const unsigned char* srcCopy = src;
    int i = 0;

    /* `use vars' declares globals
     * `use constant' declares constants
     */
    const char* knownPragmas [] = {
	"attributes", "autouse",  "base",      "bigint",      "bignum",
	"bigrat",     "blib",     "bytes",     "charnames",   "diagnostics",
	"fields",     "filetest", "if",        "integer",     "less",
	"locale",     "open",     "overload",  "sigtrap",     "sort",
	"strict",     "subs",     "utf8",      "vars",        "vmsish",
	"warnings",   NULL
    };

    use_ahead = strncmp( (const char*) src, "use", (size_t) 3 ) == 0;
    no_ahead  = strncmp( (const char*) src, "no",  (size_t) 2 ) == 0;

    if( use_ahead || no_ahead ) {
	/* Advance to next word */
	src += use_ahead ? 3 : 2;
	/* Probably we found a pragma declaration */
	pragmaName = (void*) getNextWord( src, ";=" );

	if( pragmaName == NULL ) {
            /* No word after `use' or `no' => probably this is not a pragma */
            src = srcCopy;
            return 0;
	} else {
	    /* Try to recognize one of known pragma keywords */
            while( knownPragmas[i] != NULL ) {
                if( strcmp(knownPragmas[i], (const char*) pragmaName) == 0 ) {
                    /* Found! */
                    found = TRUE;
                    break;
                }
                i++;
            }
	}

    	/* Create a tag if found a pragma */
	if( found ) {
	    /* Add a minus char if this is a `no ....' pragma */
	    if( no_ahead )
		strcat(pragmaName, " -");

            pragmaName = vStringNewInit( pragmaName );
            makeSimpleTag( pragmaName, PerlKinds, K_PRAGMA );
            vStringDelete(pragmaName);

        } else {
	    /* Restore source pointer and exit */
	    src = srcCopy;
	}

    }

    return found;

}

int parseUseRequire( const unsigned char* src ) {

    void* moduleName = NULL;
    const unsigned char* srcCopy = src;
    int found = 0;
    perlKind kind;

    boolean found_use     = strncmp( (const char*) src, "use",     (size_t) 3 ) == 0;
    boolean found_require = strncmp( (const char*) src, "require", (size_t) 7 ) == 0;

    if( found_use || found_require ) {

	if( found_use ) {
	    src += 3; kind = K_USE;
	} else {
	    src += 7; kind = K_REQUIRE;
	}

	moduleName = getNextWord( src, "; \t" );

	if( moduleName == NULL ) {
	    /* Restore previous position in source file */
	    src = srcCopy;
	} else {
	    found = 1;
            moduleName = vStringNewInit(moduleName);
	    makeSimpleTag( moduleName, PerlKinds, kind );
	    vStringDelete(moduleName);
	}
    }

    return found;
}


int parseGlobal( const unsigned char* src ) {

    void* varName = NULL;
    const unsigned char* srcCopy = src;
    int found = 0;

    if (strncmp((const char*) src, "our", (size_t) 3) == 0) {
	/* Probably we found a global */
	src += 3;
	varName = getNextWord( src, ";=" );
	if( varName == NULL ) {
	    src = srcCopy;
	} else {
	    found = 1;
	    varName = vStringNewInit(varName); 
	    makeSimpleTag( varName, PerlKinds, K_GLOBAL );
	    vStringDelete(varName);
	}
    } 

    return found;
}

int parseSub ( const unsigned char* src ) {
    void* subName = NULL;
    const unsigned char* srcCopy = src;
    int found = 0;
    perlKind kind;

    boolean found_sub = (boolean) (strncmp((const char*) src, "sub",     (size_t) 3) == 0);
    boolean found_pkg = (boolean) (strncmp((const char*) src, "package", (size_t) 7) == 0);

    if( found_sub || found_pkg ) {

        if( found_sub ) {
	    src += 3;  kind = K_SUBROUTINE;
	} else {
	    src += 7;  kind = K_PACKAGE;
	}

	subName = getNextWord( src, "{(;" );

	if( subName == NULL ) {
	    /* Restore previous position in source file */
	    src = srcCopy;
	} else {
	    found = 1;
            subName = vStringNewInit(subName);
	    makeSimpleTag(subName, PerlKinds, kind);
	    vStringDelete(subName);
	}
    }

    return found;
}



/* Algorithm adapted from from GNU etags.
 * Perl support by Bart Robinson <lomew@cs.utah.edu>
 * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/
 */
static void findPerlTags (void)
{
    boolean skipPodDoc = FALSE;
    const unsigned char *line;
    perlKind kind;

    while ((line = fileReadLine ()) != NULL) {

	const unsigned char *cp = line;

	/* Handle all "start-of-line" strings */
	if (skipPodDoc) {
	    if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0)
		skipPodDoc = FALSE;
	    continue;
	}
	else if (line [0] == '=') {
	    skipPodDoc = (boolean) (strncmp (
			(const char*) line + 1, "cut", (size_t) 3) != 0);
	    continue;
	}
	
	/* Skip comments */
	else if (line [0] == '#')
	    continue;
	/* __DATA__ or __END__ end the source code */
	else if (strcmp ((const char*) line, "__DATA__") == 0)
	    break;
	else if (strcmp ((const char*) line, "__END__") == 0)
	    break;

	while (isspace (*cp))
	    cp++;

	if( parseConstant(cp) )
	    continue;

	if( parsePragma(cp) )
	    continue;

	if( parseUseRequire(cp) )
	    continue;

	if( parseGlobal(cp) ) 
	    continue;

	if( parseSub(cp) )
	    continue;

    }

}

extern parserDefinition* PerlParser (void)
{
    static const char *const extensions [] = { "pl", "pm", "plx", "perl", "PL", NULL };
    parserDefinition* def = parserNew ("Perl");
    def->kinds      = PerlKinds;
    def->kindCount  = KIND_COUNT (PerlKinds);
    def->extensions = extensions;
    def->parser     = findPerlTags;
    return def;
}

/* vim: set nu nowrap et ts=4 sw=4: */

