当前位置: 移动技术网 > IT编程>开发语言>.net > 笔记 - C#从头开始构建编译器 - 3

笔记 - C#从头开始构建编译器 - 3

2019年05月10日  | 移动技术网IT编程  | 我要评论

视频与pr:https://github.com/terrajobst/minsk/blob/master/docs/episode-03.md

作者是 immo landwerth(https://twitter.com/terrajobst),微软 .net 团队的项目经理。

 

这一集前半段主要是重构代码,后半段的主要内容:

1. 变量与赋值表达式

2. 加强诊断信息

 

parser 非常清晰

using system.collections.generic;

namespace minsk.codeanalysis.syntax
{
    internal sealed class parser
    {
        private readonly syntaxtoken[] _tokens;
        private int _position;
        private diagnosticbag _diagnostics = new diagnosticbag();

        public parser(string text)
        {
            var tokens = new list<syntaxtoken>();

            var lexer = new lexer(text);
            syntaxtoken token;
            do
            {
                token = lexer.lex();
                if (token.kind != syntaxkind.whitespacetoken && token.kind != syntaxkind.badtoken)
                    tokens.add(token);
            } while (token.kind != syntaxkind.endoffiletoken);

            _tokens = tokens.toarray();
            _diagnostics.addrange(lexer.diagnostics);
        }

        public diagnosticbag diagnostics => _diagnostics;

        private syntaxtoken peek(int offset)
        {
            var index = _position + offset;
            if (index >= _tokens.length)
                return _tokens[_tokens.length - 1];
            return _tokens[index];
        }

        private syntaxtoken current => peek(0);

        private syntaxtoken nexttoken()
        {
            var token = current;
            _position++;
            return token;
        }

        private syntaxtoken matchtoken(syntaxkind kind)
        {
            if (current.kind == kind)
                return nexttoken();

            _diagnostics.reportunexpectedtoken(current.span, current.kind, kind);
            return new syntaxtoken(kind, current.position, null, null);
        }

        public syntaxtree parse()
        {
            var expression = parseexpression();
            var endoffiletoken = matchtoken(syntaxkind.endoffiletoken);
            return new syntaxtree(_diagnostics, expression, endoffiletoken);
        }

        private expressionsyntax parseexpression()
        {
            return parseassignmentexpression();
        }

        private expressionsyntax parseassignmentexpression()
        {
            if (peek(0).kind == syntaxkind.identifiertoken && peek(1).kind == syntaxkind.equalstoken)
            {
                var identifiertoken = nexttoken();
                var equalstoken = nexttoken();
                var right = parseassignmentexpression();
                return new assignmentexpressionsyntax(identifiertoken, equalstoken, right);
            }

            return parsebinaryexpression();
        }

        private expressionsyntax parsebinaryexpression(int parentprecedence = 0)
        {
            expressionsyntax left;
            var unaryoperatorprecedence = current.kind.getunaryoperatorprecedence();
            if (unaryoperatorprecedence != 0 && unaryoperatorprecedence >= parentprecedence)
            {
                var operatortoken = nexttoken();
                var operand = parsebinaryexpression(unaryoperatorprecedence);
                left = new unaryexpressionsyntax(operatortoken, operand);
            }
            else
                left = parseprimaryexpression();

            while (true)
            {
                var precedence = current.kind.getbinaryoperatorprecedence();
                if (precedence == 0 || precedence <= parentprecedence)
                    break;

                var operatortoken = nexttoken();
                var right = parsebinaryexpression(precedence);
                left = new binaryexpressionsyntax(left, operatortoken, right);
            }

            return left;
        }

        private expressionsyntax parseprimaryexpression()
        {
            switch (current.kind)
            {
                case syntaxkind.openparenthesistoken:
                {
                    var left = nexttoken();
                    var expression = parseexpression();
                    var right = matchtoken(syntaxkind.closeparenthesistoken);
                    return new parenthesizedexpressionsyntax(left, expression, right);
                }

                case syntaxkind.truekeyword:
                case syntaxkind.falsekeyword:
                {
                    var keywordtoken = nexttoken();
                    var value = keywordtoken.kind == syntaxkind.truekeyword;
                    return new literalexpressionsyntax(keywordtoken, value);
                }

                case syntaxkind.identifiertoken:
                {
                    var identifiertoken = nexttoken();
                    return new nameexpressionsyntax(identifiertoken);
                }

                default:
                {
                    var numbertoken = matchtoken(syntaxkind.numbertoken);
                    return new literalexpressionsyntax(numbertoken);
                }
            }

        }
    }
}

作为语义分析的 binder 也非常清晰

using system;
using system.collections.generic;
using system.linq;
using minsk.codeanalysis.syntax;

namespace minsk.codeanalysis.binding
{
    internal sealed class binder
    {
        private readonly diagnosticbag _diagnostics = new diagnosticbag ();
        private readonly dictionary<variablesymbol, object> _variables;

        public binder(dictionary<variablesymbol, object> variables)
        {
            _variables = variables;
        }

        public diagnosticbag diagnostics => _diagnostics;

        public boundexpression bindexpression(expressionsyntax syntax)
        {
            switch (syntax.kind)
            {
                case syntaxkind.parenthesizedexpression:
                    return bindparenthesizedexpression((parenthesizedexpressionsyntax)syntax);
                case syntaxkind.literalexpression:
                    return bindliteralexpression((literalexpressionsyntax)syntax);
                case syntaxkind.nameexpression:
                    return bindnameexpression((nameexpressionsyntax)syntax);
                case syntaxkind.assignmentexpression:
                    return bindassignmentexpression((assignmentexpressionsyntax)syntax);
                case syntaxkind.unaryexpression:
                    return bindunaryexpression((unaryexpressionsyntax)syntax);
                case syntaxkind.binaryexpression:
                    return bindbinaryexpression((binaryexpressionsyntax)syntax);
                default:
                    throw new exception($"unexpected syntax {syntax.kind}");
            }
        }

        private boundexpression bindparenthesizedexpression(parenthesizedexpressionsyntax syntax)
        {
            return bindexpression(syntax.expression);
        }

        private boundexpression bindliteralexpression(literalexpressionsyntax syntax)
        {
            var value = syntax.value ?? 0;
            return new boundliteralexpression(value);
        }

        private boundexpression bindnameexpression(nameexpressionsyntax syntax)
        {
            var name = syntax.identifiertoken.text;
            var variable = _variables.keys.firstordefault(v => v.name == name);
            if (variable == null)
            {
                _diagnostics.reportundefinedname(syntax.identifiertoken.span, name);
                return new boundliteralexpression(0);
            }

            return new boundvariableexpression(variable);
        }

        private boundexpression bindassignmentexpression(assignmentexpressionsyntax syntax)
        {
            var name = syntax.identifiertoken.text;
            var boundexpression = bindexpression(syntax.expression);

            var existingvariable = _variables.keys.firstordefault(v => v.name == name);
            if (existingvariable != null)
                _variables.remove(existingvariable);

            var variable = new variablesymbol(name, boundexpression.type);
            _variables[variable] = null;

            return new boundassignmentexpression(variable, boundexpression);
        }

        private boundexpression bindunaryexpression(unaryexpressionsyntax syntax)
        {
            var boundoperand = bindexpression(syntax.operand);
            var boundoperator = boundunaryoperator.bind(syntax.operatortoken.kind, boundoperand.type);
            if (boundoperator == null)
            {
                _diagnostics.reportundefinedunaryoperator(syntax.operatortoken.span, syntax.operatortoken.text, boundoperand.type);
                return boundoperand;
            }
            return new boundunaryexpression(boundoperator, boundoperand);
        }

        private boundexpression bindbinaryexpression(binaryexpressionsyntax syntax)
        {
            var boundleft = bindexpression(syntax.left);
            var boundright = bindexpression(syntax.right);
            var boundoperator = boundbinaryoperator.bind(syntax.operatortoken.kind, boundleft.type, boundright.type);
            if (boundoperator == null)
            {
                _diagnostics.reportundefinedbinaryoperator(syntax.operatortoken.span, syntax.operatortoken.text, boundleft.type, boundright.type);
                return boundleft;
            }
            return new boundbinaryexpression(boundleft, boundoperator, boundright);
        }
    }
}

 

c#语言点:

public static class enumerable
{
    public static tsource firstordefault<tsource>(this ienumerable<tsource> source, func<tsource, bool> predicate);
}

firstordefault 可以使用谓词作为判断条件,binder 的 55 行使用了 lambda 表达式。

 

如对本文有疑问, 点击进行留言回复!!

相关文章:

验证码:
移动技术网