summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEmilio Cobos Álvarez <ecoal95@gmail.com>2016-11-06 14:36:47 +0100
committerEmilio Cobos Álvarez <ecoal95@gmail.com>2016-11-08 20:55:42 +0100
commite22a11b4d8d37ff1ed850fd596a0110b433907ee (patch)
tree36a8d15a33bcfcfecafc8daa835c062403c2a3ec
parent7fe40e0cf92df36219308406dcb4130a848fb6f6 (diff)
Parse macros with cexpr.
-rw-r--r--Cargo.toml1
-rwxr-xr-xsrc/clang.rs69
-rw-r--r--src/ir/context.rs25
-rw-r--r--src/ir/var.rs130
-rwxr-xr-xsrc/lib.rs1
-rw-r--r--tests/expectations/tests/jsval_layout_opaque.rs3
-rw-r--r--tests/expectations/tests/macro-expr-basic.rs14
-rw-r--r--tests/headers/macro-expr-basic.h12
8 files changed, 182 insertions, 73 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 4e455fa9..97bcb82f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,6 +31,7 @@ env_logger = "0.3"
rustc-serialize = "0.3.19"
syntex_syntax = "0.44"
regex = "0.1"
+cexpr = "0.2"
[dependencies.aster]
features = ["with-syntex"]
diff --git a/src/clang.rs b/src/clang.rs
index e6d78123..177d7dab 100755
--- a/src/clang.rs
+++ b/src/clang.rs
@@ -4,8 +4,9 @@
#![allow(non_upper_case_globals, dead_code)]
+use cexpr;
use clangll::*;
-use std::{mem, ptr};
+use std::{mem, ptr, slice};
use std::ffi::{CStr, CString};
use std::fmt;
use std::hash::Hash;
@@ -1051,18 +1052,18 @@ impl TranslationUnit {
let range = cursor.extent();
let mut tokens = vec![];
unsafe {
- let mut token_ptr = ::std::ptr::null_mut();
+ let mut token_ptr = ptr::null_mut();
let mut num_tokens: c_uint = 0;
clang_tokenize(self.x, range, &mut token_ptr, &mut num_tokens);
if token_ptr.is_null() {
return None;
}
- let token_array = ::std::slice::from_raw_parts(token_ptr,
- num_tokens as usize);
+
+ let token_array = slice::from_raw_parts(token_ptr,
+ num_tokens as usize);
for &token in token_array.iter() {
let kind = clang_getTokenKind(token);
- let spelling: String = clang_getTokenSpelling(self.x, token)
- .into();
+ let spelling = clang_getTokenSpelling(self.x, token).into();
tokens.push(Token {
kind: kind,
@@ -1073,6 +1074,62 @@ impl TranslationUnit {
}
Some(tokens)
}
+
+ /// Convert a set of tokens from clang into `cexpr` tokens, for further
+ /// processing.
+ pub fn cexpr_tokens(&self,
+ cursor: &Cursor)
+ -> Option<Vec<cexpr::token::Token>> {
+ use cexpr::token;
+
+ let mut tokens = match self.tokens(cursor) {
+ Some(tokens) => tokens,
+ None => return None,
+ };
+
+ // FIXME(emilio): LLVM 3.9 at least always include an extra token for no
+ // good reason (except if we're at EOF). So we do this kind of hack,
+ // where we skip known-to-cause problems trailing punctuation and
+ // trailing keywords.
+ //
+ // This is sort of unfortunate, though :(.
+ //
+ // I'll try to get it fixed in LLVM if I have the time to submit a
+ // patch.
+ let mut trim_last_token = false;
+ if let Some(token) = tokens.last() {
+ // The starting of the next macro.
+ trim_last_token |= token.spelling == "#" &&
+ token.kind == CXToken_Punctuation;
+
+ // A following keyword of any kind, like a following declaration.
+ trim_last_token |= token.kind == CXToken_Keyword;
+ }
+
+ if trim_last_token {
+ tokens.pop().unwrap();
+ }
+
+ Some(tokens.into_iter()
+ .filter_map(|token| {
+ let kind = match token.kind {
+ CXToken_Punctuation => token::Kind::Punctuation,
+ CXToken_Literal => token::Kind::Literal,
+ CXToken_Identifier => token::Kind::Identifier,
+ CXToken_Keyword => token::Kind::Keyword,
+ // NB: cexpr is not too happy about comments inside
+ // expressions, so we strip them down here.
+ CXToken_Comment => return None,
+ _ => panic!("Found unexpected token kind: {}", token.kind),
+ };
+
+ Some(token::Token {
+ kind: kind,
+ raw: token.spelling.into_bytes().into_boxed_slice(),
+ })
+ })
+ .collect::<Vec<_>>())
+ }
}
impl Drop for TranslationUnit {
diff --git a/src/ir/context.rs b/src/ir/context.rs
index f11b387a..977db9c5 100644
--- a/src/ir/context.rs
+++ b/src/ir/context.rs
@@ -1,10 +1,11 @@
//! Common context that is passed around during parsing and codegen.
use BindgenOptions;
+use cexpr;
use clang::{self, Cursor};
use parse::ClangItemParser;
use std::borrow::{Borrow, Cow};
-use std::collections::{HashMap, HashSet, hash_map};
+use std::collections::{HashMap, hash_map};
use std::collections::btree_map::{self, BTreeMap};
use std::fmt;
use super::int::IntKind;
@@ -77,8 +78,9 @@ pub struct BindgenContext<'ctx> {
pub currently_parsed_types: Vec<(Cursor, ItemId)>,
/// A HashSet with all the already parsed macro names. This is done to avoid
- /// hard errors while parsing duplicated macros.
- parsed_macros: HashSet<String>,
+ /// hard errors while parsing duplicated macros, as well to allow macro
+ /// expression parsing.
+ parsed_macros: HashMap<Vec<u8>, cexpr::expr::EvalResult>,
/// The active replacements collected from replaces="xxx" annotations.
replacements: HashMap<String, ItemId>,
@@ -715,14 +717,21 @@ impl<'ctx> BindgenContext<'ctx> {
}
/// Have we parsed the macro named `macro_name` already?
- pub fn parsed_macro(&self, macro_name: &str) -> bool {
- self.parsed_macros.contains(macro_name)
+ pub fn parsed_macro(&self, macro_name: &[u8]) -> bool {
+ self.parsed_macros.contains_key(macro_name)
+ }
+
+ /// Get the currently parsed macros.
+ pub fn parsed_macros(&self) -> &HashMap<Vec<u8>, cexpr::expr::EvalResult> {
+ debug_assert!(!self.in_codegen_phase());
+ &self.parsed_macros
}
/// Mark the macro named `macro_name` as parsed.
- pub fn note_parsed_macro(&mut self, macro_name: String) {
- debug_assert!(!self.parsed_macros.contains(&macro_name));
- self.parsed_macros.insert(macro_name);
+ pub fn note_parsed_macro(&mut self,
+ id: Vec<u8>,
+ value: cexpr::expr::EvalResult) {
+ self.parsed_macros.insert(id, value);
}
/// Are we in the codegen phase?
diff --git a/src/ir/var.rs b/src/ir/var.rs
index 33e56242..62f17030 100644
--- a/src/ir/var.rs
+++ b/src/ir/var.rs
@@ -1,7 +1,9 @@
//! Intermediate representation of variables.
+use cexpr;
use clang;
use parse::{ClangItemParser, ClangSubItemParser, ParseError, ParseResult};
+use std::num::Wrapping;
use super::context::BindgenContext;
use super::function::cursor_mangling;
use super::int::IntKind;
@@ -73,43 +75,61 @@ impl ClangSubItemParser for Var {
ctx: &mut BindgenContext)
-> Result<ParseResult<Self>, ParseError> {
use clangll::*;
+ use cexpr::expr::EvalResult;
match cursor.kind() {
CXCursor_MacroDefinition => {
- let value = parse_int_literal_tokens(&cursor,
- ctx.translation_unit());
+ let value = parse_macro(ctx, &cursor, ctx.translation_unit());
- let value = match value {
+ let (id, value) = match value {
Some(v) => v,
None => return Err(ParseError::Continue),
};
- let name = cursor.spelling();
- if name.is_empty() {
- warn!("Empty macro name?");
- return Err(ParseError::Continue);
- }
+ assert!(!id.is_empty(), "Empty macro name?");
- if ctx.parsed_macro(&name) {
+ if ctx.parsed_macro(&id) {
+ let name = String::from_utf8(id).unwrap();
warn!("Duplicated macro definition: {}", name);
return Err(ParseError::Continue);
}
- ctx.note_parsed_macro(name.clone());
-
- let ty = if value < 0 {
- Item::builtin_type(TypeKind::Int(IntKind::Int), true, ctx)
- } else if value.abs() > u32::max_value() as i64 {
- Item::builtin_type(TypeKind::Int(IntKind::ULongLong),
- true,
- ctx)
- } else {
- Item::builtin_type(TypeKind::Int(IntKind::UInt), true, ctx)
+
+ // NB: It's important to "note" the macro even if the result is
+ // not an integer, otherwise we might loose other kind of
+ // derived macros.
+ ctx.note_parsed_macro(id.clone(), value.clone());
+
+ // NOTE: Unwrapping, here and above, is safe, because the
+ // identifier of a token comes straight from clang, and we
+ // enforce utf8 there, so we should have already panicked at
+ // this point.
+ let name = String::from_utf8(id).unwrap();
+ let (int_kind, val) = match value {
+ // TODO(emilio): Handle the non-invalid ones!
+ EvalResult::Float(..) |
+ EvalResult::Char(..) |
+ EvalResult::Str(..) |
+ EvalResult::Invalid => return Err(ParseError::Continue),
+
+ EvalResult::Int(Wrapping(value)) => {
+ let kind = if value < 0 {
+ if value < i32::min_value() as i64 {
+ IntKind::LongLong
+ } else {
+ IntKind::Int
+ }
+ } else if value > u32::max_value() as i64 {
+ IntKind::ULongLong
+ } else {
+ IntKind::UInt
+ };
+
+ (kind, value)
+ }
};
- Ok(ParseResult::New(Var::new(name,
- None,
- ty,
- Some(value),
- true),
+ let ty = Item::builtin_type(TypeKind::Int(int_kind), true, ctx);
+
+ Ok(ParseResult::New(Var::new(name, None, ty, Some(val), true),
Some(cursor)))
}
CXCursor_VarDecl => {
@@ -153,49 +173,43 @@ impl ClangSubItemParser for Var {
}
}
-/// Try and parse the immediately found tokens from an unit (if any) to integers
+/// Try and parse a macro using all the macros parsed until now.
+fn parse_macro(ctx: &BindgenContext,
+ cursor: &clang::Cursor,
+ unit: &clang::TranslationUnit)
+ -> Option<(Vec<u8>, cexpr::expr::EvalResult)> {
+ use cexpr::{expr, nom};
+
+ let cexpr_tokens = match unit.cexpr_tokens(cursor) {
+ None => return None,
+ Some(tokens) => tokens,
+ };
+
+ let parser = expr::IdentifierParser::new(ctx.parsed_macros());
+ let result = parser.macro_definition(&cexpr_tokens);
+
+ match result {
+ nom::IResult::Done(_, (id, val)) => Some((id.into(), val)),
+ _ => None,
+ }
+}
+
fn parse_int_literal_tokens(cursor: &clang::Cursor,
unit: &clang::TranslationUnit)
-> Option<i64> {
- use clangll::{CXToken_Literal, CXToken_Punctuation};
+ use cexpr::{expr, nom};
+ use cexpr::expr::EvalResult;
- let tokens = match unit.tokens(cursor) {
+ let cexpr_tokens = match unit.cexpr_tokens(cursor) {
None => return None,
Some(tokens) => tokens,
};
- let mut literal = None;
- let mut negate = false;
- for token in tokens.into_iter() {
- match token.kind {
- CXToken_Punctuation if token.spelling == "-" => {
- negate = !negate;
- }
- CXToken_Literal => {
- literal = Some(token.spelling);
- break;
- }
- _ => {
- // Reset values if we found anything else
- negate = false;
- literal = None;
- }
- }
+ // TODO(emilio): We can try to parse other kinds of literals.
+ match expr::expr(&cexpr_tokens) {
+ nom::IResult::Done(_, EvalResult::Int(Wrapping(val))) => Some(val),
+ _ => None,
}
-
- literal.and_then(|lit| {
- if lit.starts_with("0x") {
- // TODO: try to preserve hex literals?
- i64::from_str_radix(&lit[2..], 16).ok()
- } else if lit == "0" {
- Some(0)
- } else if lit.starts_with("0") {
- i64::from_str_radix(&lit[1..], 8).ok()
- } else {
- lit.parse().ok()
- }
- })
- .map(|lit| if negate { -lit } else { lit })
}
fn get_integer_literal_from_cursor(cursor: &clang::Cursor,
diff --git a/src/lib.rs b/src/lib.rs
index 03dac3cc..a632c461 100755
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -24,6 +24,7 @@
#[macro_use]
extern crate cfg_if;
+extern crate cexpr;
extern crate syntex_syntax as syntax;
extern crate aster;
extern crate quasi;
diff --git a/tests/expectations/tests/jsval_layout_opaque.rs b/tests/expectations/tests/jsval_layout_opaque.rs
index f3c1014e..fa611f20 100644
--- a/tests/expectations/tests/jsval_layout_opaque.rs
+++ b/tests/expectations/tests/jsval_layout_opaque.rs
@@ -24,8 +24,9 @@ impl <T> ::std::clone::Clone for __BindgenUnionField<T> {
fn clone(&self) -> Self { Self::new() }
}
impl <T> ::std::marker::Copy for __BindgenUnionField<T> { }
-pub const JSVAL_ALIGNMENT: ::std::os::raw::c_uint = 8;
pub const JSVAL_TAG_SHIFT: ::std::os::raw::c_uint = 47;
+pub const JSVAL_PAYLOAD_MASK: ::std::os::raw::c_ulonglong = 140737488355327;
+pub const JSVAL_TAG_MASK: ::std::os::raw::c_longlong = -140737488355328;
#[repr(u8)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub enum JSValueType {
diff --git a/tests/expectations/tests/macro-expr-basic.rs b/tests/expectations/tests/macro-expr-basic.rs
new file mode 100644
index 00000000..7a5c71e0
--- /dev/null
+++ b/tests/expectations/tests/macro-expr-basic.rs
@@ -0,0 +1,14 @@
+/* automatically generated by rust-bindgen */
+
+
+#![allow(non_snake_case)]
+
+
+pub const FOO: ::std::os::raw::c_uint = 1;
+pub const BAR: ::std::os::raw::c_uint = 4;
+pub const BAZ: ::std::os::raw::c_uint = 5;
+pub const BARR: ::std::os::raw::c_uint = 1;
+pub const BAZZ: ::std::os::raw::c_uint = 7;
+pub const I_RAN_OUT_OF_DUMB_NAMES: ::std::os::raw::c_uint = 7;
+pub const HAZ_A_COMMENT: ::std::os::raw::c_uint = 1;
+pub const HAZ_A_COMMENT_INSIDE: ::std::os::raw::c_uint = 2;
diff --git a/tests/headers/macro-expr-basic.h b/tests/headers/macro-expr-basic.h
new file mode 100644
index 00000000..55b11367
--- /dev/null
+++ b/tests/headers/macro-expr-basic.h
@@ -0,0 +1,12 @@
+#define FOO 1
+#define BAR 4
+#define BAZ (FOO + BAR)
+
+#define BARR (1 << 0)
+#define BAZZ ((1 << 1) + BAZ)
+#define I_RAN_OUT_OF_DUMB_NAMES (BARR | BAZZ)
+
+/* I haz a comment */
+#define HAZ_A_COMMENT BARR
+
+#define HAZ_A_COMMENT_INSIDE (/* comment for real */ BARR + FOO)