文章标题

本文介绍了一个简单的词法分析器实现过程,通过状态机的方式处理输入流,识别出包括标识符、数字、运算符等在内的各类词法单元,并对一些特殊符号进行了特别处理,如注释块等。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

int get_token()
{
    int c, len, i, max_word_len, state, token, flag, pause_line, pause_block;

    state = START;
    pause_line = 0;
    pause_block = 0;

    while( state != OVER  )
    {
        if( state == PAUSE )
        {
            if( pause_line )
            {
                pause_line = 0;
                pos = input_len;
                state = START;
            }
            if( pause_block )
            {
                while( (c = get_next_word(&pos)) != FINISH )
                {
                    if( c == '*' && input_line[pos] == '/' )
                    {
                        input_line[pos++];
                        break;
                    }
                }
                if( c == FINISH )
                    return FINISH;
                pause_block = 0;
                state = START;
            }
        }
        c = get_next_word(&pos);
        if( c == FINISH )
            return FINISH;
        if( c == ' ' || c == '\t' || c == '\n' )
            continue;
        if( isalpha( c ) || c == '_' )
        {
            i = 0;
            max_word_len = 0;
            do
            {
                save_word[i++] = c;
                c = input_line[pos++];
                max_word_len++;
            }while( isdigit( c ) || c == '_' || isalpha( c ) );
            pos--;
            if( max_word_len >= 20 )
            {
                fprintf( stderr, "word's max len is 20"); // do a error deal function
                exit( 0 );
            }
            save_word[i] = '\0';
            token = TOK_ID;
            state = OVER;
        }
        else if( isdigit( c ) )
        {
            i = 0;
            if( flag == '-' )
                save_word[i++] = '-';
            while( isdigit( c ) )
            {
                save_word[i++] = c;
                c = input_line[pos++];
            }
            pos--;
            save_word[i] = '\0';
            save_num = atoi( save_word );
            token = TOK_NUM;
            state = OVER;
            flag = 0;
        }
        else
        {
            state = OVER;
            switch( c )
            {
                case '+':
                    token = TOK_PLUS;
                    break;
                case '-':
                    token = TOK_MINUS;
                    break;
                case '*':
                    token = TOK_MUL;
                    break;
                case '/':
                    if( input_line[pos] == '/' )
                    {
                        input_line[pos++];
                        state = PAUSE;
                        pause_line = 1;
                    }
                    else if( input_line[pos] == '*' )
                    {
                        input_line[pos++];
                        state = PAUSE;
                        pause_block = 1;
                    }
                    else
                    {
                        token = TOK_DIV;
                    }
                    break;
                case '=':
                    if( input_line[pos] == '=' )
                    {
                        input_line[pos++];
                        token = TOK_EQ;
                    }
                    else
                        token = TOK_ASSIGN;
                    break;
                case '>':
                    if( input_line[pos] == '=' )
                    {
                        input_line[pos++];
                        token = TOK_GE;
                    }
                    else
                        token = TOK_GT;
                    break;
                case '<':
                    if( input_line[pos] == '=' )
                    {
                        input_line[pos++];
                        token = TOK_LE;
                    }
                    else
                        token = TOK_LT;
                    break;
                case '!':
                    if( input_line[pos] == '=' )
                    {
                        input_line[pos++];
                        token = TOK_NE;
                    }
                    else
                        token = TOK_NOT;
                    break;
                case '&':
                    if( input_line[pos] == '&' )
                    {
                        input_line[pos++];
                        token = TOK_ANDAND;
                    }
                    else
                        token = TOK_AND;
                    break;
                case '|':
                    if( input_line[pos] == '|' )
                    {
                        input_line[pos++];
                        token = TOK_OROR;
                    }
                    else
                        token = TOK_OR;
                    break;
                case ',':
                    token = TOK_COMMA;
                    break;
                case ':'://no gonna use 'condition ?  true : false'
                    token = TOK_COLON;
                    break;
                case ';':
                    token = TOK_SEMI;
                    break;
                case 39:    //  '
                    token = TOK_SQUTOA;
                    break;
                case 34:    //  "
                    token = TOK_DQUTOA;
                    break;
                case '(':
                    token = TOK_LPAREN;
                    break;
                case ')':
                    token = TOK_RPAREN;
                    break;
                case '[':
                    token = TOK_LSQUARE;
                    break;
                case ']':
                    token = TOK_RSQUARE;
                    break;
                case '{':
                    token = TOK_LBRACKET;
                    break;
                case '}':
                    token = TOK_RBRACKET;
                    break;
            }
        }
    }
    if( token == TOK_ID )
    {
        int t;
        if( (t = is_resever_word(save_word)) != -1 )
            token = t;
    }
    return token;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值