summaryrefslogtreecommitdiff
path: root/src/ir/context.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/ir/context.rs')
-rw-r--r--src/ir/context.rs685
1 files changed, 685 insertions, 0 deletions
diff --git a/src/ir/context.rs b/src/ir/context.rs
new file mode 100644
index 00000000..5ddedda0
--- /dev/null
+++ b/src/ir/context.rs
@@ -0,0 +1,685 @@
+use super::ty::{Type, TypeKind, FloatKind};
+use super::item::{Item, ItemCanonicalName, ItemId};
+use super::item_kind::ItemKind;
+use super::int::IntKind;
+use super::module::Module;
+use clang::{self, Cursor};
+use std::borrow::{Cow, Borrow};
+use std::collections::btree_map::{self, BTreeMap};
+use std::collections::{HashSet, HashMap};
+use std::fmt;
+use syntax::ast::Ident;
+use syntax::codemap::{DUMMY_SP, Span};
+use syntax::ext::base::ExtCtxt;
+use parse::ClangItemParser;
+use BindgenOptions;
+
+// This is just convenience to avoid creating a manual debug impl for the
+// context.
+struct GenContext<'ctx>(ExtCtxt<'ctx>);
+
+impl<'ctx> fmt::Debug for GenContext <'ctx> {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "GenContext {{ ... }}")
+ }
+}
+
+/// A context used during parsing and generation of structs.
+#[derive(Debug)]
+pub struct BindgenContext<'ctx> {
+ /// The map of all the items parsed so far.
+ ///
+ /// It's a BTreeMap because we want the keys to be sorted to have consistent
+ /// output.
+ items: BTreeMap<ItemId, Item>,
+
+ /// Clang cursor to type map. This is needed to be able to associate types
+ /// with item ids during parsing.
+ ///
+ /// The cursor used for storage is the definition cursor.
+ types: HashMap<Cursor, ItemId>,
+
+ /// A cursor to module map. Similar reason than above.
+ modules: HashMap<Cursor, ItemId>,
+
+ /// The root module, this is guaranteed to be an item of kind Module.
+ root_module: ItemId,
+
+ /// Current module being traversed.
+ current_module: ItemId,
+
+ /// A stack with the current type declarations and types we're parsing. This
+ /// is needed to avoid infinite recursion when parsing a type like:
+ ///
+ /// struct c { struct c* next; };
+ ///
+ /// This means effectively, that a type has a potential ID before knowing if
+ /// it's a correct type. But that's not important in practice.
+ ///
+ /// We could also use the `types` HashMap, but my intention with it is that
+ /// only valid types and declarations end up there, and this could
+ /// potentially break that assumption.
+ ///
+ /// FIXME: Should not be public, though... meh.
+ pub currently_parsed_types: Vec<(Cursor, ItemId)>,
+
+ /// A HashSet with all the already parsed macro names. This is done to avoid
+ /// hard errors while parsing duplicated macros.
+ parsed_macros: HashSet<String>,
+
+ /// The active replacements collected from replaces="xxx" annotations.
+ replacements: HashMap<String, ItemId>,
+
+ collected_typerefs: bool,
+
+ /// Dummy structures for code generation.
+ gen_ctx: Option<&'ctx GenContext<'ctx>>,
+ span: Span,
+
+ /// The clang index for parsing.
+ index: clang::Index,
+
+ /// The translation unit for parsing.
+ translation_unit: clang::TranslationUnit,
+
+ /// The options given by the user via cli or other medium.
+ options: BindgenOptions,
+}
+
+impl<'ctx> BindgenContext<'ctx> {
+ pub fn new(options: BindgenOptions) -> Self {
+ use clangll;
+
+ let index = clang::Index::new(false, true);
+
+ let translation_unit =
+ clang::TranslationUnit::parse(&index, "", &options.clang_args, &[],
+ clangll::CXTranslationUnit_DetailedPreprocessingRecord);
+
+ let root_module = Self::build_root_module();
+ let mut me = BindgenContext {
+ items: Default::default(),
+ types: Default::default(),
+ modules: Default::default(),
+ root_module: root_module.id(),
+ current_module: root_module.id(),
+ currently_parsed_types: vec![],
+ parsed_macros: Default::default(),
+ replacements: Default::default(),
+ collected_typerefs: false,
+ gen_ctx: None,
+ span: DUMMY_SP,
+ index: index,
+ translation_unit: translation_unit,
+ options: options,
+ };
+
+ me.add_item(root_module, None, None);
+
+ me
+ }
+
+ pub fn add_item(&mut self,
+ item: Item,
+ declaration: Option<Cursor>,
+ location: Option<Cursor>) {
+ use clangll::{CXCursor_ClassTemplate, CXCursor_ClassTemplatePartialSpecialization};
+ debug!("BindgenContext::add_item({:?}, declaration: {:?}, loc: {:?}", item, declaration, location);
+ debug_assert!(declaration.is_some() || !item.kind().is_type() ||
+ item.kind().expect_type().is_builtin_or_named(),
+ "Adding a type without declaration?");
+
+ let id = item.id();
+ let is_type = item.kind().is_type();
+ let old_item = self.items.insert(id, item);
+ assert!(old_item.is_none(), "Inserted type twice?");
+
+ if is_type && declaration.is_some() {
+ let declaration = declaration.unwrap();
+ debug_assert_eq!(declaration, declaration.canonical());
+ if declaration.is_valid() {
+ let old = self.types.insert(declaration, id);
+ debug_assert_eq!(old, None);
+ } else if location.is_some() &&
+ (location.unwrap().kind() == CXCursor_ClassTemplate ||
+ location.unwrap().kind() == CXCursor_ClassTemplatePartialSpecialization) {
+ let old = self.types.insert(location.unwrap().canonical(), id);
+ debug_assert_eq!(old, None);
+ } else {
+ // This could happen, for example, with types like `int*` or
+ // similar.
+ //
+ // Fortunately, we don't care about those types being
+ // duplicated, so we can just ignore them.
+ debug!("Invalid declaration {:?} found for type {:?}",
+ declaration, self.items.get(&id).unwrap().kind().expect_type());
+ }
+ }
+ }
+
+ // TODO: Move all this syntax crap to other part of the code.
+ pub fn ext_cx(&self) -> &ExtCtxt<'ctx> {
+ &self.gen_ctx.expect("Not in gen phase").0
+ }
+
+ pub fn span(&self) -> Span {
+ self.span
+ }
+
+ /// Mangles a name so it doesn't conflict with any keyword.
+ pub fn rust_mangle<'a>(&self, name: &'a str) -> Cow<'a, str> {
+ use syntax::parse::token;
+ let ident = self.rust_ident_raw(&name);
+ let token = token::Ident(ident);
+ if token.is_any_keyword() ||
+ name.contains("@") ||
+ name.contains("?") ||
+ name.contains("$") ||
+ "bool" == name
+ {
+ let mut s = name.to_owned();
+ s = s.replace("@", "_");
+ s = s.replace("?", "_");
+ s = s.replace("$", "_");
+ s.push_str("_");
+ return Cow::Owned(s)
+ }
+ Cow::Borrowed(name)
+ }
+
+ /// Returns a mangled name as a rust identifier.
+ pub fn rust_ident(&self, name: &str) -> Ident {
+ self.rust_ident_raw(&self.rust_mangle(name))
+ }
+
+ pub fn rust_ident_raw<S>(&self, name: &S) -> Ident
+ where S: Borrow<str>,
+ {
+ self.ext_cx().ident_of(name.borrow())
+ }
+
+ pub fn items<'a>(&'a self) -> btree_map::Iter<'a, ItemId, Item> {
+ self.items.iter()
+ }
+
+ pub fn collected_typerefs(&self) -> bool {
+ self.collected_typerefs
+ }
+
+ fn collect_typerefs(&mut self) -> Vec<(ItemId, clang::Type, Option<clang::Cursor>)> {
+ debug_assert!(!self.collected_typerefs);
+ self.collected_typerefs = true;
+ let mut typerefs = vec![];
+ for (id, ref mut item) in &mut self.items {
+ let kind = item.kind();
+ let ty = match kind.as_type() {
+ Some(ty) => ty,
+ None => continue,
+ };
+
+ match *ty.kind() {
+ TypeKind::UnresolvedTypeRef(ref ty, loc) => {
+ typerefs.push((*id, ty.clone(), loc));
+ }
+ _ => {},
+ };
+ }
+ typerefs
+ }
+
+ fn resolve_typerefs(&mut self) {
+ let typerefs = self.collect_typerefs();
+
+ for (id, ty, loc) in typerefs {
+ let _resolved = {
+ let resolved = Item::from_ty(&ty, loc, None, self)
+ .expect("What happened?");
+ let mut item = self.items.get_mut(&id).unwrap();
+
+ *item.kind_mut().as_type_mut().unwrap().kind_mut() =
+ TypeKind::ResolvedTypeRef(resolved);
+ resolved
+ };
+
+ // Something in the STL is trolling me. I don't need this assertion
+ // right now, but worth investigating properly once this lands.
+ //
+ // debug_assert!(self.items.get(&resolved).is_some(), "How?");
+ }
+ }
+
+ fn process_replacements(&mut self) {
+ if self.replacements.is_empty() {
+ return;
+ }
+
+ // FIXME: This is linear, but the replaces="xxx" annotation was already
+ // there, and for better or worse it's useful, sigh...
+ //
+ // We leverage the ResolvedTypeRef thing, though, which is cool :P.
+
+ let mut replacements = vec![];
+
+ for (id, item) in self.items.iter() {
+ let ty = match item.kind().as_type() {
+ Some(ty) => ty,
+ None => continue,
+ };
+
+ // canonical_name calls are expensive.
+ let ci = match ty.as_comp() {
+ Some(ci) => ci,
+ None => continue,
+ };
+
+ if ci.is_template_specialization() {
+ continue;
+ }
+
+ if let Some(replacement) = self.replacements.get(&item.canonical_name(self)) {
+ if replacement != id {
+ // We set this just after parsing the annotation. It's
+ // very unlikely, but this can happen.
+ if self.items.get(replacement).is_some() {
+ replacements.push((*id, *replacement));
+ }
+ }
+ }
+ }
+
+ for (id, replacement) in replacements {
+ let mut item = self.items.get_mut(&id).unwrap();
+ *item.kind_mut().as_type_mut().unwrap().kind_mut() =
+ TypeKind::ResolvedTypeRef(replacement);
+ }
+ }
+
+ // Enters in the generation phase.
+ pub fn gen<F, Out>(&mut self, cb: F) -> Out
+ where F: FnOnce(&Self) -> Out
+ {
+ use syntax::ext::expand::ExpansionConfig;
+ use syntax::codemap::{ExpnInfo, MacroBang, NameAndSpan};
+ use syntax::ext::base;
+ use syntax::parse;
+ use std::mem;
+
+ let cfg = ExpansionConfig::default("xxx".to_owned());
+ let sess = parse::ParseSess::new();
+ let mut loader = base::DummyMacroLoader;
+ let mut ctx =
+ GenContext(base::ExtCtxt::new(&sess, vec![], cfg, &mut loader));
+
+ ctx.0.bt_push(ExpnInfo {
+ call_site: self.span,
+ callee: NameAndSpan {
+ format: MacroBang(parse::token::intern("")),
+ allow_internal_unstable: false,
+ span: None
+ }
+ });
+
+ // FIXME: This is evil, we should move code generation to use a wrapper
+ // of BindgenContext instead, I guess. Even though we know it's fine
+ // because we remove it before the end of this function.
+ self.gen_ctx = Some(unsafe { mem::transmute(&ctx) });
+
+ self.resolve_typerefs();
+ self.process_replacements();
+
+ let ret = cb(self);
+ self.gen_ctx = None;
+ ret
+ }
+
+ // This deserves a comment. Builtin types don't get a valid declaration, so
+ // we can't add it to the cursor->type map.
+ //
+ // That being said, they're not generated anyway, and are few, so the
+ // duplication and special-casing is fine.
+ //
+ // If at some point we care about the memory here, probably a map TypeKind
+ // -> builtin type ItemId would be the best to improve that.
+ fn add_builtin_item(&mut self, item: Item) {
+ debug_assert!(item.kind().is_type());
+ let id = item.id();
+ let old_item = self.items.insert(id, item);
+ assert!(old_item.is_none(), "Inserted type twice?");
+ }
+
+ fn build_root_module() -> Item {
+ let module = Module::new(Some("root".into()));
+ let id = ItemId::next();
+ Item::new(id, None, None, id, ItemKind::Module(module))
+ }
+
+ pub fn root_module(&self) -> ItemId {
+ self.root_module
+ }
+
+ pub fn resolve_type(&self, type_id: ItemId) -> &Type {
+ self.items.get(&type_id).unwrap().kind().expect_type()
+ }
+
+ pub fn safe_resolve_type(&self, type_id: ItemId) -> Option<&Type> {
+ self.items.get(&type_id).map(|t| t.kind().expect_type())
+ }
+
+ pub fn resolve_item_fallible(&self, item_id: ItemId) -> Option<&Item> {
+ self.items.get(&item_id)
+ }
+
+ pub fn resolve_item(&self, item_id: ItemId) -> &Item {
+ match self.items.get(&item_id) {
+ Some(item) => item,
+ None => panic!("Not an item: {:?}", item_id),
+ }
+ }
+
+ pub fn current_module(&self) -> ItemId {
+ self.current_module
+ }
+
+ /// This is one of the hackiest methods in all the parsing code. This method
+ /// is used to allow having templates with another argument names instead of
+ /// the canonical ones.
+ ///
+ /// This is surprisingly difficult to do with libclang, due to the fact that
+ /// partial template specializations don't provide explicit template
+ /// argument information.
+ ///
+ /// The only way to do this as far as I know, is inspecting manually the
+ /// AST, looking for TypeRefs inside. This, unfortunately, doesn't work for
+ /// more complex cases, see the comment on the assertion below.
+ ///
+ /// To see an example of what this handles:
+ ///
+ /// ```
+ /// template<typename T>
+ /// class Incomplete {
+ /// T p;
+ /// };
+ ///
+ /// template<typename U>
+ /// class Foo {
+ /// Incomplete<U> bar;
+ /// };
+ /// ```
+ fn build_template_wrapper(&mut self,
+ wrapping: ItemId,
+ parent_id: ItemId,
+ ty: &clang::Type,
+ location: clang::Cursor) -> ItemId {
+ use clangll::*;
+ let mut args = vec![];
+ let mut found_invalid_template_ref = false;
+ let self_id = ItemId::next();
+ location.visit(|c, _| {
+ if c.kind() == CXCursor_TemplateRef &&
+ c.cur_type().kind() == CXType_Invalid {
+ found_invalid_template_ref = true;
+ }
+ if c.kind() == CXCursor_TypeRef {
+ let new_ty =
+ Item::from_ty_or_ref(c.cur_type(), Some(*c), Some(self_id), self);
+ args.push(new_ty);
+ }
+ CXChildVisit_Continue
+ });
+
+ let item = {
+ let wrapping_type = self.resolve_type(wrapping);
+ let old_args = match *wrapping_type.kind() {
+ TypeKind::Comp(ref ci) => ci.template_args(),
+ _ => panic!("how?"),
+ };
+ // The following assertion actually fails with partial template
+ // specialization. But as far as I know there's no way at all to
+ // grab the specialized types from neither the AST or libclang.
+ //
+ // This flaw was already on the old parser, but I now think it has
+ // no clear solution.
+ //
+ // For an easy example in which there's no way at all of getting the
+ // `int` type, except manually parsing the spelling:
+ //
+ // template<typename T, typename U>
+ // class Incomplete {
+ // T d;
+ // U p;
+ // };
+ //
+ // template<typename U>
+ // class Foo {
+ // Incomplete<U, int> bar;
+ // };
+ //
+ // debug_assert_eq!(old_args.len(), args.len());
+ //
+ // That being said, this is not so common, so just error! and hope
+ // for the best, returning the previous type, who knows.
+ if old_args.len() != args.len() {
+ error!("Found partial template specialization, expect dragons!");
+ return wrapping;
+ }
+
+ let type_kind = TypeKind::TemplateRef(wrapping, args);
+ let name = ty.spelling();
+ let name = if name.is_empty() { None } else { Some(name) };
+ let ty = Type::new(name, ty.fallible_layout().ok(), type_kind, ty.is_const());
+ Item::new(self_id, None, None, parent_id, ItemKind::Type(ty))
+ };
+
+ // Bypass all the validations in add_item explicitly.
+ self.items.insert(self_id, item);
+ self_id
+ }
+
+ /// Looks up for an already resolved type, either because it's builtin, or
+ /// because we already have it in the map.
+ pub fn builtin_or_resolved_ty(&mut self,
+ parent_id: Option<ItemId>,
+ ty: &clang::Type,
+ location: Option<clang::Cursor>) -> Option<ItemId> {
+ use clangll::{CXCursor_ClassTemplate, CXCursor_ClassTemplatePartialSpecialization};
+ debug!("builtin_or_resolved_ty: {:?}, {:?}, {:?}", ty, location, parent_id);
+ let mut declaration = ty.declaration();
+ if !declaration.is_valid() {
+ if let Some(location) = location {
+ if location.kind() == CXCursor_ClassTemplate ||
+ location.kind() == CXCursor_ClassTemplatePartialSpecialization {
+ declaration = location;
+ }
+ }
+ }
+ let canonical_declaration = declaration.canonical();
+ if canonical_declaration.is_valid() {
+ // First lookup to see if we already have it resolved.
+ let id = self.types.get(&canonical_declaration).map(|id| *id);
+ if let Some(id) = id {
+ debug!("Already resolved ty {:?}, {:?}, {:?} {:?}",
+ id, declaration, ty, location);
+ // If the declaration existed, we *might* be done, but it's not
+ // the case for class templates, where the template arguments
+ // may vary.
+ //
+ // In this case, we create a TemplateRef with the new template
+ // arguments, pointing to the canonical template.
+ //
+ // Note that we only do it if parent_id is some, and we have a
+ // location for building the new arguments, the template
+ // argument names don't matter in the global context.
+ if (declaration.kind() == CXCursor_ClassTemplate ||
+ declaration.kind() == CXCursor_ClassTemplatePartialSpecialization) &&
+ *ty != canonical_declaration.cur_type() &&
+ location.is_some() && parent_id.is_some() {
+ return Some(
+ self.build_template_wrapper(id, parent_id.unwrap(), ty,
+ location.unwrap()));
+ }
+
+ return Some(self.build_ty_wrapper(id, parent_id, ty));
+ }
+ }
+
+ debug!("Not resolved, maybe builtin?");
+
+ // Else, build it.
+ self.build_builtin_ty(ty, declaration)
+ }
+
+ // This is unfortunately a lot of bloat, but is needed to properly track
+ // constness et. al.
+ //
+ // We should probably make the constness tracking separate, so it doesn't
+ // bloat that much, but hey, we already bloat the heck out of builtin types.
+ fn build_ty_wrapper(&mut self,
+ wrapped_id: ItemId,
+ parent_id: Option<ItemId>,
+ ty: &clang::Type) -> ItemId {
+ let id = ItemId::next();
+ let spelling = ty.spelling();
+ let is_const = ty.is_const();
+ let layout = ty.fallible_layout().ok();
+ let type_kind = TypeKind::ResolvedTypeRef(wrapped_id);
+ let ty = Type::new(Some(spelling), layout, type_kind, is_const);
+ let item = Item::new(id, None, None,
+ parent_id.unwrap_or(self.current_module), ItemKind::Type(ty));
+ self.add_builtin_item(item);
+ id
+ }
+
+ fn build_builtin_ty(&mut self,
+ ty: &clang::Type,
+ _declaration: Cursor) -> Option<ItemId> {
+ use clangll::*;
+ let type_kind = match ty.kind() {
+ CXType_NullPtr => TypeKind::NullPtr,
+ CXType_Void => TypeKind::Void,
+ CXType_Bool => TypeKind::Int(IntKind::Bool),
+ CXType_Int => TypeKind::Int(IntKind::Int),
+ CXType_UInt => TypeKind::Int(IntKind::UInt),
+ CXType_SChar |
+ CXType_Char_S => TypeKind::Int(IntKind::Char),
+ CXType_UChar |
+ CXType_Char_U => TypeKind::Int(IntKind::UChar),
+ CXType_Short => TypeKind::Int(IntKind::Short),
+ CXType_UShort => TypeKind::Int(IntKind::UShort),
+ CXType_WChar |
+ CXType_Char16 => TypeKind::Int(IntKind::U16),
+ CXType_Char32 => TypeKind::Int(IntKind::U32),
+ CXType_Long => TypeKind::Int(IntKind::Long),
+ CXType_ULong => TypeKind::Int(IntKind::ULong),
+ CXType_LongLong => TypeKind::Int(IntKind::LongLong),
+ CXType_ULongLong => TypeKind::Int(IntKind::ULongLong),
+ CXType_Float => TypeKind::Float(FloatKind::Float),
+ CXType_Double => TypeKind::Float(FloatKind::Double),
+ CXType_LongDouble => TypeKind::Float(FloatKind::LongDouble),
+ _ => return None,
+ };
+
+ let spelling = ty.spelling();
+ let is_const = ty.is_const();
+ let layout = ty.fallible_layout().ok();
+ let ty = Type::new(Some(spelling), layout, type_kind, is_const);
+ let id = ItemId::next();
+ let item = Item::new(id, None, None, self.root_module, ItemKind::Type(ty));
+ self.add_builtin_item(item);
+ Some(id)
+ }
+
+ pub fn translation_unit(&self) -> &clang::TranslationUnit {
+ &self.translation_unit
+ }
+
+ pub fn parsed_macro(&self, macro_name: &str) -> bool {
+ self.parsed_macros.contains(macro_name)
+ }
+
+ pub fn note_parsed_macro(&mut self, macro_name: String) {
+ debug_assert!(!self.parsed_macros.contains(&macro_name));
+ self.parsed_macros.insert(macro_name);
+ }
+
+ pub fn in_codegen_phase(&self) -> bool {
+ self.gen_ctx.is_some()
+ }
+
+ /// This is a bit of a hack, but it's done so using the replaces="xxx"
+ /// annotation implies hide in the other type.
+ pub fn replace(&mut self, name: &str, potential_ty: ItemId) {
+ self.replacements.insert(name.into(), potential_ty);
+ }
+
+ pub fn hidden_by_name(&self, name: &str) -> bool {
+ debug_assert!(self.in_codegen_phase(),
+ "You're not supposed to call this yet");
+ self.options.hidden_types.contains(name)
+ }
+
+ pub fn opaque_by_name(&self, name: &str) -> bool {
+ debug_assert!(self.in_codegen_phase(),
+ "You're not supposed to call this yet");
+ self.options.opaque_types.contains(name)
+ }
+
+ pub fn options(&self) -> &BindgenOptions {
+ &self.options
+ }
+
+ /// Given a CXCursor_Namespace cursor, return the item id of the
+ /// corresponding module, or create one on the fly.
+ pub fn module(&mut self, cursor: clang::Cursor) -> ItemId {
+ use clangll::*;
+ assert!(cursor.kind() == CXCursor_Namespace, "Be a nice person");
+ let cursor = cursor.canonical();
+ let module_id = match self.modules.get(&cursor) {
+ Some(id) => return *id,
+ None => ItemId::next(),
+ };
+
+ let module_name = self.translation_unit
+ .tokens(&cursor).and_then(|tokens| {
+ if tokens.len() <= 1 {
+ None
+ } else {
+ match &*tokens[1].spelling {
+ "{" => None,
+ s => Some(s.to_owned()),
+ }
+ }
+ });
+
+ let module = Module::new(module_name);
+ let module = Item::new(module_id, None, None, self.current_module,
+ ItemKind::Module(module));
+
+ self.add_item(module, None, None);
+
+ module_id
+ }
+
+ pub fn with_module<F>(&mut self, module_id: ItemId, cb: F)
+ where F: FnOnce(&mut Self, &mut Vec<ItemId>)
+ {
+ debug_assert!(self.resolve_item(module_id).kind().is_module(), "Wat");
+
+ let previous_id = self.current_module;
+ self.current_module = module_id;
+
+ let mut children = vec![];
+ cb(self, &mut children);
+
+ self.items.get_mut(&module_id).unwrap()
+ .as_module_mut().expect("Not a module?")
+ .children_mut().extend(children.into_iter());
+
+ self.current_module = previous_id;
+ }
+}
+
+/// This was originally a type that only exposes the resolve_type operation to
+/// its consumers.
+///
+/// Later a made resolve_type public, so... meh. It should go away soon.
+pub type TypeResolver<'ctx> = BindgenContext<'ctx>;