C++ 中的布尔表达式(语法)解析器

2021-12-12 00:00:00 parsing c++ boost-spirit

我想解析一个布尔表达式(在 C++ 中).输入形式:

a and b xor (c and d or a and b);

我只想把这个表达式解析成一棵树,知道优先规则(not,and,xor,or).所以上面的表达式应该是这样的:

(a and b) xor ((c and d) or (a and b));

到解析器.

树的形式如下:

 一个和乙或者C和d异或一个和乙

输入将通过命令行或字符串的形式.我只需要解析器.

是否有任何来源可以帮助我做到这一点?

解决方案

这里是一个基于 Boost Spirit 的实现.

因为 Boost Spirit 根据表达式模板生成递归下降解析器,所以遵守特殊"(原文如此)优先规则(正如其他人提到的)非常乏味.因此语法缺乏一定的优雅.

抽象数据类型

我使用Boost Variant的递归变体支持定义了一个树数据结构,注意expr的定义:

struct op_or {};//标签结构 op_and {};//标签结构 op_xor {};//标签结构 op_not {};//标签typedef std::string var;模板 结构体二进制;模板 结构项目厅;typedef boost::variant<var,boost::recursive_wrapper<unop <op_not>>,boost::recursive_wrapper<binop<op_and>>,boost::recursive_wrapper>,boost::recursive_wrapper<binop<op_or>>>表达式;

(完整来源如下)

语法规则

以下是(略显乏味的)语法定义,如上所述.

虽然我不认为这种语法是最优的,但它的可读性很强,而且我们在大约 50 行代码中拥有一个静态编译的解析器具有强类型 AST 数据类型.情况可能会更糟.

template 结构解析器:qi::grammar{parser() : parser::base_type(expr_){使用命名空间qi;expr_ = or_.alias();not_ = ("not" > simple ) [ _val = phx::construct>(_1) ] |简单 [_val = _1];#ifdef RIGHT_ASSOCIATIVEor_ = (xor_ >> "or" >> or_ ) [ _val = phx::construct<binop<op_or >>(_1, _2)] |xor_ [ _val = _1 ];xor_ = (and_ >> "xor" >> xor_) [ _val = phx::construct<binop<op_xor>>(_1, _2)] |和_ [ _val = _1 ];and_ = (not_ >> "and" >> and_) [ _val = phx::construct<binop<op_and>>(_1, _2)] |不_ [ _val = _1 ];#别的or_ = xor_ [ _val = _1 ] >>*(或">>xor_[_val = phx::construct>(_val,_1)]);xor_ = and_ [ _val = _1 ] >>*("xor" >> and_ [ _val = phx::construct<binop<op_xor>>(_val, _1) ]);and_ = not_ [ _val = _1 ] >>*("and" >> not_ [ _val = phx::construct<binop<op_and>>(_val, _1) ]);#万一简单 = (('(' > expr_ > ')') | var_);var_ = qi::lexeme[ +alpha ];}私人的:qi::rule变量_;qi::rulenot_, and_, xor_, or_, simple, expr_;};

<块引用>

注意:我已经留下了在二元运算符中导致右结合的旧规则定义以供参考,但左结合更自然,因此默认采用

在语法树上操作

显然,您需要对表达式求值.现在,我决定停止只是打印,所以我不必为命名变量做查找表:)

遍历递归变体起初可能看起来很神秘,但是一旦掌握了boost::static_visitor<> 就会出奇的简单:

struct 打印机:boost::static_visitor{打印机(std::ostream& os):_os(os){}std::ostream&_os;//void operator()(const var& v) const { _os <<v;}void operator()(const binop<op_and>& b) const { print(" & ", b.oper1, b.oper2);}void operator()(const binop<op_or >& b) const { print(" | ", b.oper1, b.oper2);}void operator()(const binop<op_xor>& b) const { print(" ^ ", b.oper1, b.oper2);}void print(const std::string& op, const expr& l, const expr& r) const{_os<<"(";boost::apply_visitor(*this, l);_os<<操作;boost::apply_visitor(*this, r);_os<<")";}void operator()(const unop<op_not>& u) const{_os<<"(";_os<<!";boost::apply_visitor(*this, u.oper1);_os<<")";}};std::ostream&运算符<<(std::ostream& os, const expr& e){ boost::apply_visitor(printer(os), e);返回操作系统;}

测试输出:

对于代码中的测试用例,以下是输出,通过添加(冗余)括号演示正确优先规则的处理:

生活在 Coliru

result: ((a & b) ^ ((c & d) | (a & b)))结果:((a & b) ^ ((c & d) | (a & b)))结果:(a & b)结果:(a | b)结果:(a ^ b)结果:(!a)结果:((!a) & b)结果:(!(a & b))结果:((a | b) | c)

<块引用>

注意,与 Live On Coliru 相比,使用 -DRIGHT_ASSOCIATIVE

完整代码:

生活在 Coliru

#include #include #include #include 命名空间 qi = boost::spirit::qi;命名空间 phx = boost::phoenix;结构 op_or {};结构 op_and {};结构 op_xor {};结构 op_not {};typedef std::string var;模板 结构体二进制;模板 结构项目厅;typedef boost::variant<var,boost::recursive_wrapper<unop <op_not>>,boost::recursive_wrapper<binop<op_and>>,boost::recursive_wrapper>,boost::recursive_wrapper<binop<op_or>>>表达式;模板 结构体{显式 binop(const expr& l, const expr& r) : oper1(l), oper2(r) { }expr oper1, oper2;};模板 结构体{显式 unop(const expr& o) : oper1(o) { }expr oper1;};结构打印机:boost::static_visitor{打印机(std::ostream& os):_os(os){}std::ostream&_os;//void operator()(const var& v) const { _os <<v;}void operator()(const binop<op_and>& b) const { print(" & ", b.oper1, b.oper2);}void operator()(const binop<op_or >& b) const { print(" | ", b.oper1, b.oper2);}void operator()(const binop<op_xor>& b) const { print(" ^ ", b.oper1, b.oper2);}void print(const std::string& op, const expr& l, const expr& r) const{_os<<"(";boost::apply_visitor(*this, l);_os<<操作;boost::apply_visitor(*this, r);_os<<")";}void operator()(const unop<op_not>& u) const{_os<<"(";_os<<!";boost::apply_visitor(*this, u.oper1);_os<<")";}};std::ostream&运算符<<(std::ostream& os, const expr& e){ boost::apply_visitor(printer(os), e);返回操作系统;}template 结构解析器:qi::grammar{parser() : parser::base_type(expr_){使用命名空间qi;expr_ = or_.alias();not_ = ("not" > simple ) [ _val = phx::construct>(_1) ] |简单 [_val = _1];#ifdef RIGHT_ASSOCIATIVEor_ = (xor_ >> "or" >> or_ ) [ _val = phx::construct<binop<op_or >>(_1, _2)] |xor_ [ _val = _1 ];xor_ = (and_ >> "xor" >> xor_) [ _val = phx::construct<binop<op_xor>>(_1, _2)] |和_ [ _val = _1 ];and_ = (not_ >> "and" >> and_) [ _val = phx::construct<binop<op_and>>(_1, _2)] |不_ [ _val = _1 ];#别的or_ = xor_ [ _val = _1 ] >>*(或">>xor_[_val = phx::construct>(_val,_1)]);xor_ = and_ [ _val = _1 ] >>*("xor" >> and_ [ _val = phx::construct<binop<op_xor>>(_val, _1) ]);and_ = not_ [ _val = _1 ] >>*("and" >> not_ [ _val = phx::construct<binop<op_and>>(_val, _1) ]);#万一简单 = (('(' > expr_ > ')') | var_);var_ = qi::lexeme[ +alpha ];BOOST_SPIRIT_DEBUG_NODE(expr_);BOOST_SPIRIT_DEBUG_NODE(或_);BOOST_SPIRIT_DEBUG_NODE(xor_);BOOST_SPIRIT_DEBUG_NODE(and_);BOOST_SPIRIT_DEBUG_NODE(不是_);BOOST_SPIRIT_DEBUG_NODE(简单);BOOST_SPIRIT_DEBUG_NODE(var_);}私人的:qi::rule变量_;qi::rulenot_, and_, xor_, or_, simple, expr_;};int main(){for (auto& input : std::list<std::string> {//来自 OP:"(a and b) xor ((c and d) or (a and b));","a and b xor (c and d or a and b);",///更简单的测试:"a 和 b;","a 或 b;","a xor b;","不是;","不是 a 和 b;","不是 (a 和 b);","a 或 b 或 c;",}){自动 f(std::begin(input)), l(std::end(input));解析器<decltype(f)>p;尝试{expr 结果;bool ok = qi::phrase_parse(f,l,p > ';',qi::space,result);如果(!好的)std::cerr <<"无效输入
";别的std::cout <<结果:" <<结果<<"
";} catch (const qi::expectation_failure<decltype(f)>&e){std::cerr <<expectation_failure at '" <<std::string(e.first, e.last) <<"'
";}如果 (f!=l) std::cerr <<未解析:'"<<std::string(f,l) <<"'
";}返回0;}

奖励:

为了获得奖励积分,获得一棵与 OP 中所示一模一样的树:

生活在 Coliru

static const char indentstep[] = " ";struct tree_print : boost::static_visitor{tree_print(std::ostream& os, const std::string& indent=indentstep) : _os(os), _indent(indent) {}std::ostream&_os;std::string _indent;void operator()(const var& v) const { _os <<_缩进<

结果:

 一个和乙或者C和d异或一个和乙

I want to parse a boolean expression (in C++). Input form:

a and b xor (c and d or a and b);

I just want to parse this expression into a tree, knowing the precedence rule (not,and,xor,or). So the above expression should look something like:

(a and b) xor ((c and d) or (a and b));

to the parser.

And the tree would be of form:

                        a
                   and
                        b
               or
                        c
                   and
                        d
        xor
                   a
              and
                   b

The input will be either through command line or in the form of a string. I just need the parser.

Are there any sources that can help me do this?

解决方案

Here is an implementation based on Boost Spirit.

Because Boost Spirit generates recursive descent parsers based on expression templates, honouring the 'idiosyncratic' (sic) precedence rules (as mentioned by others) is quite tedious. Therefore the grammar lacks a certain elegance.

Abstract Data Type

I defined a tree data structure using Boost Variant's recursive variant support, note the definition of expr:

struct op_or  {}; // tag
struct op_and {}; // tag
struct op_xor {}; // tag
struct op_not {}; // tag

typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;

typedef boost::variant<var, 
        boost::recursive_wrapper<unop <op_not> >, 
        boost::recursive_wrapper<binop<op_and> >,
        boost::recursive_wrapper<binop<op_xor> >,
        boost::recursive_wrapper<binop<op_or> >
        > expr;

(full source below)

Grammar Rules

The following is the (slightly tedious) grammar definition, as mentioned.

Although I don't consider this grammar optimal, it is quite readable, and we have ourselves a statically compiled parser with strongly typed AST datatype in roughly 50 lines of code. Things could be considerably worse.

template <typename It, typename Skipper = qi::space_type>
    struct parser : qi::grammar<It, expr(), Skipper>
{
    parser() : parser::base_type(expr_)
    {
        using namespace qi;
        expr_  = or_.alias();

        not_ = ("not" > simple       ) [ _val = phx::construct<unop <op_not>>(_1)     ] | simple [ _val = _1 ];
#ifdef RIGHT_ASSOCIATIVE
        or_  = (xor_ >> "or"  >> or_ ) [ _val = phx::construct<binop<op_or >>(_1, _2) ] | xor_   [ _val = _1 ];
        xor_ = (and_ >> "xor" >> xor_) [ _val = phx::construct<binop<op_xor>>(_1, _2) ] | and_   [ _val = _1 ];
        and_ = (not_ >> "and" >> and_) [ _val = phx::construct<binop<op_and>>(_1, _2) ] | not_   [ _val = _1 ];
#else
        or_  = xor_ [ _val = _1 ] >> *("or"  >> xor_ [ _val = phx::construct<binop<op_or>> (_val, _1) ]);
        xor_ = and_ [ _val = _1 ] >> *("xor" >> and_ [ _val = phx::construct<binop<op_xor>>(_val, _1) ]);
        and_ = not_ [ _val = _1 ] >> *("and" >> not_ [ _val = phx::construct<binop<op_and>>(_val, _1) ]);
#endif

        simple = (('(' > expr_ > ')') | var_);
        var_ = qi::lexeme[ +alpha ];
    }

  private:
    qi::rule<It, var() , Skipper> var_;
    qi::rule<It, expr(), Skipper> not_, and_, xor_, or_, simple, expr_;
};

Note: I've left the old rule definitions that would result in right-associativity in the binary operators for reference, but left-associativity is the more natural, and therefore taken by default

Operating on the syntax tree

Obviously, you'd want to evaluate the expressions. For now, I decided to stop at just printing, so I don't have to do the lookup table for named variables :)

Traversing a recursive variant may look cryptic at first, but the boost::static_visitor<> is surprisingly simple once you get the hang of it:

struct printer : boost::static_visitor<void>
{
    printer(std::ostream& os) : _os(os) {}
    std::ostream& _os;

    //
    void operator()(const var& v) const { _os << v; }

    void operator()(const binop<op_and>& b) const { print(" & ", b.oper1, b.oper2); }
    void operator()(const binop<op_or >& b) const { print(" | ", b.oper1, b.oper2); }
    void operator()(const binop<op_xor>& b) const { print(" ^ ", b.oper1, b.oper2); }

    void print(const std::string& op, const expr& l, const expr& r) const
    {
        _os << "(";
            boost::apply_visitor(*this, l);
            _os << op;
            boost::apply_visitor(*this, r);
        _os << ")";
    }

    void operator()(const unop<op_not>& u) const
    {
        _os << "(";
            _os << "!";
            boost::apply_visitor(*this, u.oper1);
        _os << ")";
    }
};

std::ostream& operator<<(std::ostream& os, const expr& e)
{ boost::apply_visitor(printer(os), e); return os; }

Test output:

For the test cases in the code, the following is output, demonstrating correct handling of the precedence rules by adding (redundant) parentheses:

Live On Coliru

result: ((a & b) ^ ((c & d) | (a & b)))
result: ((a & b) ^ ((c & d) | (a & b)))
result: (a & b)
result: (a | b)
result: (a ^ b)
result: (!a)
result: ((!a) & b)
result: (!(a & b))
result: ((a | b) | c)

Note, compare to Live On Coliru, with -DRIGHT_ASSOCIATIVE

Full Code:

Live On Coliru

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/variant/recursive_wrapper.hpp>

namespace qi    = boost::spirit::qi;
namespace phx   = boost::phoenix;

struct op_or  {};
struct op_and {};
struct op_xor {};
struct op_not {};

typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;

typedef boost::variant<var, 
        boost::recursive_wrapper<unop <op_not> >, 
        boost::recursive_wrapper<binop<op_and> >,
        boost::recursive_wrapper<binop<op_xor> >,
        boost::recursive_wrapper<binop<op_or> >
        > expr;

template <typename tag> struct binop 
{ 
    explicit binop(const expr& l, const expr& r) : oper1(l), oper2(r) { }
    expr oper1, oper2; 
};

template <typename tag> struct unop  
{ 
    explicit unop(const expr& o) : oper1(o) { }
    expr oper1; 
};

struct printer : boost::static_visitor<void>
{
    printer(std::ostream& os) : _os(os) {}
    std::ostream& _os;

    //
    void operator()(const var& v) const { _os << v; }

    void operator()(const binop<op_and>& b) const { print(" & ", b.oper1, b.oper2); }
    void operator()(const binop<op_or >& b) const { print(" | ", b.oper1, b.oper2); }
    void operator()(const binop<op_xor>& b) const { print(" ^ ", b.oper1, b.oper2); }

    void print(const std::string& op, const expr& l, const expr& r) const
    {
        _os << "(";
            boost::apply_visitor(*this, l);
            _os << op;
            boost::apply_visitor(*this, r);
        _os << ")";
    }

    void operator()(const unop<op_not>& u) const
    {
        _os << "(";
            _os << "!";
            boost::apply_visitor(*this, u.oper1);
        _os << ")";
    }
};

std::ostream& operator<<(std::ostream& os, const expr& e)
{ boost::apply_visitor(printer(os), e); return os; }

template <typename It, typename Skipper = qi::space_type>
    struct parser : qi::grammar<It, expr(), Skipper>
{
    parser() : parser::base_type(expr_)
    {
        using namespace qi;

        expr_  = or_.alias();

        not_ = ("not" > simple       ) [ _val = phx::construct<unop <op_not>>(_1)     ] | simple [ _val = _1 ];
#ifdef RIGHT_ASSOCIATIVE
        or_  = (xor_ >> "or"  >> or_ ) [ _val = phx::construct<binop<op_or >>(_1, _2) ] | xor_   [ _val = _1 ];
        xor_ = (and_ >> "xor" >> xor_) [ _val = phx::construct<binop<op_xor>>(_1, _2) ] | and_   [ _val = _1 ];
        and_ = (not_ >> "and" >> and_) [ _val = phx::construct<binop<op_and>>(_1, _2) ] | not_   [ _val = _1 ];
#else
        or_  = xor_ [ _val = _1 ] >> *("or"  >> xor_ [ _val = phx::construct<binop<op_or>> (_val, _1) ]);
        xor_ = and_ [ _val = _1 ] >> *("xor" >> and_ [ _val = phx::construct<binop<op_xor>>(_val, _1) ]);
        and_ = not_ [ _val = _1 ] >> *("and" >> not_ [ _val = phx::construct<binop<op_and>>(_val, _1) ]);
#endif

        simple = (('(' > expr_ > ')') | var_);
        var_ = qi::lexeme[ +alpha ];

        BOOST_SPIRIT_DEBUG_NODE(expr_);
        BOOST_SPIRIT_DEBUG_NODE(or_);
        BOOST_SPIRIT_DEBUG_NODE(xor_);
        BOOST_SPIRIT_DEBUG_NODE(and_);
        BOOST_SPIRIT_DEBUG_NODE(not_);
        BOOST_SPIRIT_DEBUG_NODE(simple);
        BOOST_SPIRIT_DEBUG_NODE(var_);
    }

  private:
    qi::rule<It, var() , Skipper> var_;
    qi::rule<It, expr(), Skipper> not_, and_, xor_, or_, simple, expr_;
};

int main()
{
    for (auto& input : std::list<std::string> {
            // From the OP:
            "(a and b) xor ((c and d) or (a and b));",
            "a and b xor (c and d or a and b);",

            /// Simpler tests:
            "a and b;",
            "a or b;",
            "a xor b;",
            "not a;",
            "not a and b;",
            "not (a and b);",
            "a or b or c;",
            })
    {
        auto f(std::begin(input)), l(std::end(input));
        parser<decltype(f)> p;

        try
        {
            expr result;
            bool ok = qi::phrase_parse(f,l,p > ';',qi::space,result);

            if (!ok)
                std::cerr << "invalid input
";
            else
                std::cout << "result: " << result << "
";

        } catch (const qi::expectation_failure<decltype(f)>& e)
        {
            std::cerr << "expectation_failure at '" << std::string(e.first, e.last) << "'
";
        }

        if (f!=l) std::cerr << "unparsed: '" << std::string(f,l) << "'
";
    }

    return 0;
}

Bonus:

For bonus points, to get a tree exactly like shown in the OP:

Live On Coliru

static const char indentstep[] = "    ";

struct tree_print : boost::static_visitor<void>
{
    tree_print(std::ostream& os, const std::string& indent=indentstep) : _os(os), _indent(indent) {}
    std::ostream& _os;
    std::string _indent;

    void operator()(const var& v) const { _os << _indent << v << std::endl; }

    void operator()(const binop<op_and>& b) const { print("and ", b.oper1, b.oper2); }
    void operator()(const binop<op_or >& b) const { print("or  ", b.oper2, b.oper1); }
    void operator()(const binop<op_xor>& b) const { print("xor ", b.oper2, b.oper1); }

    void print(const std::string& op, const expr& l, const expr& r) const
    {
        boost::apply_visitor(tree_print(_os, _indent+indentstep), l);
        _os << _indent << op << std::endl;
        boost::apply_visitor(tree_print(_os, _indent+indentstep), r);
    }

    void operator()(const unop<op_not>& u) const
    {
        _os << _indent << "!";
        boost::apply_visitor(tree_print(_os, _indent+indentstep), u.oper1);
    }
};

std::ostream& operator<<(std::ostream& os, const expr& e)
{ 
    boost::apply_visitor(tree_print(os), e); return os; 
}

result:

            a
        and 
            b
    or  
            c
        and 
            d
xor 
        a
    and 
        b

相关文章