003
13.03.2008, 22:52 Uhr
0xdeadbeef
Gott (Operator)
|
Eh, ich hab aus Spaß mal etwas in der Art zusammengehackt. Es handelt sich hierbei um eine einfache LL(2)-Grammatik, die aus einem String parst (Der Scanner sollte aber recht einfach auf eine Datei umschreibbar sein). An einigen Stellen hab ich aus Performance- und Speichersparsamkeitsgründen weniger saubere Techniken benutzt (deswegen sieht sie Speicherverwaltung so widerlich aus), aber als Anschauungsbeispiel sollte das eigentlich ganz gut hinhauen. Du wirst feststellen, dass der Code in der parser.cc einer EBNF-Grammatik recht ähnlich sieht, das ist kein Zufall. Also:
C++: |
// keytree.hh
#ifndef INCLUDED_KEYTREE_HH #define INCLUDED_KEYTREE_HH
#include <string> #include <vector>
class parser;
class keytree; class kt_value;
typedef std::vector<kt_value> kt_valuelist;
class kt_value { public: enum type_id { keytree_id, valuelist_id, string_id, int_id };
std::string const &str () const { return *data_.str; } keytree const &kt () const { return *data_.kt ; } int x () const { return data_.x ; } kt_valuelist const &vl () const { return *data_.vl ; }
type_id type() const { return type_ ; }
void destroy();
private: union { keytree *kt; int x; std::string *str; kt_valuelist *vl; } data_;
type_id type_;
friend class parser; };
class keytree { public: std::string const &key () const { return key_; } kt_value const &value() const { return val_; }
~keytree();
private: std::string key_; kt_value val_;
friend class parser; };
#endif
|
C++: |
// keytree.cc
#include "keytree.hh"
void kt_value::destroy() { switch(type_) { case keytree_id: delete data_.kt; break; case string_id: delete data_.str; break; case valuelist_id: for(kt_valuelist::iterator i = data_.vl->begin(); i != data_.vl->end(); ++i) { i->destroy(); } delete data_.vl; break; default: ; } }
keytree::~keytree() { val_.destroy(); }
|
C++: |
// scanner.hh
#ifndef INCLUDED_SCANNER_HH #define INCLUDED_SCANNER_HH
#include <string>
class scanner { public: scanner(std::string const &data);
char peek(unsigned lookahead = 0) const; void advance();
private: std::string data_; std::string::size_type pos_; };
#endif
|
C++: |
// scanner.cc
#include "scanner.hh"
#include <algorithm> #include <string> #include <stdexcept>
scanner::scanner(std::string const &data) : pos_(0) { // Sanitize data
bool in_string = false;
data_.reserve(data.size() + 1);
for(std::string::const_iterator i = data.begin(); i != data.end(); ++i) { if(*i == '\"') { in_string = !in_string; }
if(in_string || *i != ' ') { data_.push_back(*i); } } }
char scanner::peek(unsigned lookahead) const { if(pos_ + lookahead >= data_.size()) { throw std::runtime_error("Unexpected end of input."); } return data_[pos_ + lookahead]; }
void scanner::advance() { peek(); ++pos_; }
|
C++: |
// parser.hh
#ifndef INCLUDED_PARSER_HH #define INCLUDED_PARSER_HH
#include "scanner.hh" #include "keytree.hh"
#include <string> #include <vector>
class parser { public: parser(scanner const &scan);
keytree *parse();
private: bool accept(char c); bool accept(bool (*cond)(char)); void expect(char c); void expect(bool (*cond)(char));
kt_value parse_value (); keytree *parse_keytree (); std::string parse_string (); int parse_number (); kt_valuelist *parse_valuelist();
scanner scan_; };
#endif
|
C++: |
// parser.cc
#include "parser.hh" #include "keytree.hh" #include "scanner.hh"
#include <stdexcept>
namespace { bool is_number(char c) { return c >= '0' && c <= '9'; }
bool is_not_number(char c) { return !is_number(c); } }
parser::parser(scanner const &scan) : scan_(scan) { }
keytree *parser::parse() { return parse_keytree(); }
bool parser::accept(char c) { if(c == scan_.peek()) { scan_.advance(); return true; } return false; }
bool parser::accept(bool (*cond)(char)) { if(cond(scan_.peek())) { scan_.advance(); return true; }
return false; }
void parser::expect(char c) { if(!accept(c)) { throw std::runtime_error(std::string("unexpected token: \"") + scan_.peek() + std::string("\"")); } }
void parser::expect(bool (*cond)(char)) { if(!accept(cond)) { throw std::runtime_error(std::string("unexpected token: \"") + scan_.peek() + std::string("\"")); } }
keytree *parser::parse_keytree() { keytree *result; std::string key; kt_value val;
expect('('); expect('.');
key = parse_string();
expect(',');
val = parse_value();
expect(')');
result = new keytree;
result->key_ = key; result->val_ = val;
return result; }
kt_value parser::parse_value() { kt_value v;
if(scan_.peek() == '(') { if(scan_.peek(1) == '.') { v.type_ = kt_value::keytree_id; v.data_.kt = parse_keytree(); } else { v.type_ = kt_value::valuelist_id; v.data_.vl = parse_valuelist(); } } else if(scan_.peek() == '"') { v.type_ = kt_value::string_id; v.data_.str = new std::string(parse_string()); } else if(scan_.peek() == '+' || scan_.peek() == '-' || ::is_number(scan_.peek())) { v.type_ = kt_value::int_id; v.data_.x = parse_number(); }
return v; }
std::string parser::parse_string() { std::string str;
expect('"');
while(!accept('"')) { str += scan_.peek(); scan_.advance(); }
return str; }
int parser::parse_number() { int x, fac = 1;
if(accept('-')) { fac = -1; } else { accept('+'); }
x = scan_.peek() - '0';
expect(::is_number);
while(is_number(scan_.peek())) { x *= 10; x += scan_.peek() - '0'; scan_.advance(); }
return x; }
kt_valuelist *parser::parse_valuelist() { kt_valuelist tmp, *result;
expect('('); do { tmp.push_back(parse_value()); } while(accept(',')); expect(')');
result = new kt_valuelist; result->swap(tmp);
return result; }
|
C++: |
// main.cc
#include "parser.hh" #include "scanner.hh" #include "keytree.hh"
#include <iostream>
void traverse_tree(keytree const &kt, unsigned depth = 0);
void print_value(kt_value const &val, unsigned depth) { std::string offset(depth * 2, ' ');
switch(val.type()) { case kt_value::int_id: std::cout << offset << " " << val.x() << std::endl; break; case kt_value::string_id: std::cout << offset << " " << val.str() << std::endl; break; case kt_value::keytree_id: traverse_tree(val.kt(), depth + 1); break; case kt_value::valuelist_id: for(kt_valuelist::const_iterator i = val.vl().begin(); i != val.vl().end(); ++i) { print_value(*i, depth); } break; default: std::cout << offset << " Unexpected." << std::endl; } }
void traverse_tree(keytree const &kt, unsigned depth) { std::string offset(depth * 2, ' ');
std::cout << offset << kt.key() << std::endl; print_value(kt.value(), depth); }
int main() { parser p(scanner("(.\"foo bar\",(\"bar\", 2 ,(.\"baz\",123),4))")); keytree *kt = p.parse();
traverse_tree(*kt);
delete kt; }
|
@FloSoft: Meinste, das wär was für einen FAQ-Eintrag? Fragen nach Parsern werden zwar nicht oft gestellt, aber die Source Corner ist leider unten... -- Einfachheit ist Voraussetzung für Zuverlässigkeit. -- Edsger Wybe Dijkstra Dieser Post wurde am 13.03.2008 um 22:54 Uhr von 0xdeadbeef editiert. |