diff options
Diffstat (limited to 'src/ir/context.rs')
-rw-r--r-- | src/ir/context.rs | 685 |
1 files changed, 685 insertions, 0 deletions
diff --git a/src/ir/context.rs b/src/ir/context.rs new file mode 100644 index 00000000..5ddedda0 --- /dev/null +++ b/src/ir/context.rs @@ -0,0 +1,685 @@ +use super::ty::{Type, TypeKind, FloatKind}; +use super::item::{Item, ItemCanonicalName, ItemId}; +use super::item_kind::ItemKind; +use super::int::IntKind; +use super::module::Module; +use clang::{self, Cursor}; +use std::borrow::{Cow, Borrow}; +use std::collections::btree_map::{self, BTreeMap}; +use std::collections::{HashSet, HashMap}; +use std::fmt; +use syntax::ast::Ident; +use syntax::codemap::{DUMMY_SP, Span}; +use syntax::ext::base::ExtCtxt; +use parse::ClangItemParser; +use BindgenOptions; + +// This is just convenience to avoid creating a manual debug impl for the +// context. +struct GenContext<'ctx>(ExtCtxt<'ctx>); + +impl<'ctx> fmt::Debug for GenContext <'ctx> { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "GenContext {{ ... }}") + } +} + +/// A context used during parsing and generation of structs. +#[derive(Debug)] +pub struct BindgenContext<'ctx> { + /// The map of all the items parsed so far. + /// + /// It's a BTreeMap because we want the keys to be sorted to have consistent + /// output. + items: BTreeMap<ItemId, Item>, + + /// Clang cursor to type map. This is needed to be able to associate types + /// with item ids during parsing. + /// + /// The cursor used for storage is the definition cursor. + types: HashMap<Cursor, ItemId>, + + /// A cursor to module map. Similar reason than above. + modules: HashMap<Cursor, ItemId>, + + /// The root module, this is guaranteed to be an item of kind Module. + root_module: ItemId, + + /// Current module being traversed. + current_module: ItemId, + + /// A stack with the current type declarations and types we're parsing. This + /// is needed to avoid infinite recursion when parsing a type like: + /// + /// struct c { struct c* next; }; + /// + /// This means effectively, that a type has a potential ID before knowing if + /// it's a correct type. But that's not important in practice. + /// + /// We could also use the `types` HashMap, but my intention with it is that + /// only valid types and declarations end up there, and this could + /// potentially break that assumption. + /// + /// FIXME: Should not be public, though... meh. + pub currently_parsed_types: Vec<(Cursor, ItemId)>, + + /// A HashSet with all the already parsed macro names. This is done to avoid + /// hard errors while parsing duplicated macros. + parsed_macros: HashSet<String>, + + /// The active replacements collected from replaces="xxx" annotations. + replacements: HashMap<String, ItemId>, + + collected_typerefs: bool, + + /// Dummy structures for code generation. + gen_ctx: Option<&'ctx GenContext<'ctx>>, + span: Span, + + /// The clang index for parsing. + index: clang::Index, + + /// The translation unit for parsing. + translation_unit: clang::TranslationUnit, + + /// The options given by the user via cli or other medium. + options: BindgenOptions, +} + +impl<'ctx> BindgenContext<'ctx> { + pub fn new(options: BindgenOptions) -> Self { + use clangll; + + let index = clang::Index::new(false, true); + + let translation_unit = + clang::TranslationUnit::parse(&index, "", &options.clang_args, &[], + clangll::CXTranslationUnit_DetailedPreprocessingRecord); + + let root_module = Self::build_root_module(); + let mut me = BindgenContext { + items: Default::default(), + types: Default::default(), + modules: Default::default(), + root_module: root_module.id(), + current_module: root_module.id(), + currently_parsed_types: vec![], + parsed_macros: Default::default(), + replacements: Default::default(), + collected_typerefs: false, + gen_ctx: None, + span: DUMMY_SP, + index: index, + translation_unit: translation_unit, + options: options, + }; + + me.add_item(root_module, None, None); + + me + } + + pub fn add_item(&mut self, + item: Item, + declaration: Option<Cursor>, + location: Option<Cursor>) { + use clangll::{CXCursor_ClassTemplate, CXCursor_ClassTemplatePartialSpecialization}; + debug!("BindgenContext::add_item({:?}, declaration: {:?}, loc: {:?}", item, declaration, location); + debug_assert!(declaration.is_some() || !item.kind().is_type() || + item.kind().expect_type().is_builtin_or_named(), + "Adding a type without declaration?"); + + let id = item.id(); + let is_type = item.kind().is_type(); + let old_item = self.items.insert(id, item); + assert!(old_item.is_none(), "Inserted type twice?"); + + if is_type && declaration.is_some() { + let declaration = declaration.unwrap(); + debug_assert_eq!(declaration, declaration.canonical()); + if declaration.is_valid() { + let old = self.types.insert(declaration, id); + debug_assert_eq!(old, None); + } else if location.is_some() && + (location.unwrap().kind() == CXCursor_ClassTemplate || + location.unwrap().kind() == CXCursor_ClassTemplatePartialSpecialization) { + let old = self.types.insert(location.unwrap().canonical(), id); + debug_assert_eq!(old, None); + } else { + // This could happen, for example, with types like `int*` or + // similar. + // + // Fortunately, we don't care about those types being + // duplicated, so we can just ignore them. + debug!("Invalid declaration {:?} found for type {:?}", + declaration, self.items.get(&id).unwrap().kind().expect_type()); + } + } + } + + // TODO: Move all this syntax crap to other part of the code. + pub fn ext_cx(&self) -> &ExtCtxt<'ctx> { + &self.gen_ctx.expect("Not in gen phase").0 + } + + pub fn span(&self) -> Span { + self.span + } + + /// Mangles a name so it doesn't conflict with any keyword. + pub fn rust_mangle<'a>(&self, name: &'a str) -> Cow<'a, str> { + use syntax::parse::token; + let ident = self.rust_ident_raw(&name); + let token = token::Ident(ident); + if token.is_any_keyword() || + name.contains("@") || + name.contains("?") || + name.contains("$") || + "bool" == name + { + let mut s = name.to_owned(); + s = s.replace("@", "_"); + s = s.replace("?", "_"); + s = s.replace("$", "_"); + s.push_str("_"); + return Cow::Owned(s) + } + Cow::Borrowed(name) + } + + /// Returns a mangled name as a rust identifier. + pub fn rust_ident(&self, name: &str) -> Ident { + self.rust_ident_raw(&self.rust_mangle(name)) + } + + pub fn rust_ident_raw<S>(&self, name: &S) -> Ident + where S: Borrow<str>, + { + self.ext_cx().ident_of(name.borrow()) + } + + pub fn items<'a>(&'a self) -> btree_map::Iter<'a, ItemId, Item> { + self.items.iter() + } + + pub fn collected_typerefs(&self) -> bool { + self.collected_typerefs + } + + fn collect_typerefs(&mut self) -> Vec<(ItemId, clang::Type, Option<clang::Cursor>)> { + debug_assert!(!self.collected_typerefs); + self.collected_typerefs = true; + let mut typerefs = vec![]; + for (id, ref mut item) in &mut self.items { + let kind = item.kind(); + let ty = match kind.as_type() { + Some(ty) => ty, + None => continue, + }; + + match *ty.kind() { + TypeKind::UnresolvedTypeRef(ref ty, loc) => { + typerefs.push((*id, ty.clone(), loc)); + } + _ => {}, + }; + } + typerefs + } + + fn resolve_typerefs(&mut self) { + let typerefs = self.collect_typerefs(); + + for (id, ty, loc) in typerefs { + let _resolved = { + let resolved = Item::from_ty(&ty, loc, None, self) + .expect("What happened?"); + let mut item = self.items.get_mut(&id).unwrap(); + + *item.kind_mut().as_type_mut().unwrap().kind_mut() = + TypeKind::ResolvedTypeRef(resolved); + resolved + }; + + // Something in the STL is trolling me. I don't need this assertion + // right now, but worth investigating properly once this lands. + // + // debug_assert!(self.items.get(&resolved).is_some(), "How?"); + } + } + + fn process_replacements(&mut self) { + if self.replacements.is_empty() { + return; + } + + // FIXME: This is linear, but the replaces="xxx" annotation was already + // there, and for better or worse it's useful, sigh... + // + // We leverage the ResolvedTypeRef thing, though, which is cool :P. + + let mut replacements = vec![]; + + for (id, item) in self.items.iter() { + let ty = match item.kind().as_type() { + Some(ty) => ty, + None => continue, + }; + + // canonical_name calls are expensive. + let ci = match ty.as_comp() { + Some(ci) => ci, + None => continue, + }; + + if ci.is_template_specialization() { + continue; + } + + if let Some(replacement) = self.replacements.get(&item.canonical_name(self)) { + if replacement != id { + // We set this just after parsing the annotation. It's + // very unlikely, but this can happen. + if self.items.get(replacement).is_some() { + replacements.push((*id, *replacement)); + } + } + } + } + + for (id, replacement) in replacements { + let mut item = self.items.get_mut(&id).unwrap(); + *item.kind_mut().as_type_mut().unwrap().kind_mut() = + TypeKind::ResolvedTypeRef(replacement); + } + } + + // Enters in the generation phase. + pub fn gen<F, Out>(&mut self, cb: F) -> Out + where F: FnOnce(&Self) -> Out + { + use syntax::ext::expand::ExpansionConfig; + use syntax::codemap::{ExpnInfo, MacroBang, NameAndSpan}; + use syntax::ext::base; + use syntax::parse; + use std::mem; + + let cfg = ExpansionConfig::default("xxx".to_owned()); + let sess = parse::ParseSess::new(); + let mut loader = base::DummyMacroLoader; + let mut ctx = + GenContext(base::ExtCtxt::new(&sess, vec![], cfg, &mut loader)); + + ctx.0.bt_push(ExpnInfo { + call_site: self.span, + callee: NameAndSpan { + format: MacroBang(parse::token::intern("")), + allow_internal_unstable: false, + span: None + } + }); + + // FIXME: This is evil, we should move code generation to use a wrapper + // of BindgenContext instead, I guess. Even though we know it's fine + // because we remove it before the end of this function. + self.gen_ctx = Some(unsafe { mem::transmute(&ctx) }); + + self.resolve_typerefs(); + self.process_replacements(); + + let ret = cb(self); + self.gen_ctx = None; + ret + } + + // This deserves a comment. Builtin types don't get a valid declaration, so + // we can't add it to the cursor->type map. + // + // That being said, they're not generated anyway, and are few, so the + // duplication and special-casing is fine. + // + // If at some point we care about the memory here, probably a map TypeKind + // -> builtin type ItemId would be the best to improve that. + fn add_builtin_item(&mut self, item: Item) { + debug_assert!(item.kind().is_type()); + let id = item.id(); + let old_item = self.items.insert(id, item); + assert!(old_item.is_none(), "Inserted type twice?"); + } + + fn build_root_module() -> Item { + let module = Module::new(Some("root".into())); + let id = ItemId::next(); + Item::new(id, None, None, id, ItemKind::Module(module)) + } + + pub fn root_module(&self) -> ItemId { + self.root_module + } + + pub fn resolve_type(&self, type_id: ItemId) -> &Type { + self.items.get(&type_id).unwrap().kind().expect_type() + } + + pub fn safe_resolve_type(&self, type_id: ItemId) -> Option<&Type> { + self.items.get(&type_id).map(|t| t.kind().expect_type()) + } + + pub fn resolve_item_fallible(&self, item_id: ItemId) -> Option<&Item> { + self.items.get(&item_id) + } + + pub fn resolve_item(&self, item_id: ItemId) -> &Item { + match self.items.get(&item_id) { + Some(item) => item, + None => panic!("Not an item: {:?}", item_id), + } + } + + pub fn current_module(&self) -> ItemId { + self.current_module + } + + /// This is one of the hackiest methods in all the parsing code. This method + /// is used to allow having templates with another argument names instead of + /// the canonical ones. + /// + /// This is surprisingly difficult to do with libclang, due to the fact that + /// partial template specializations don't provide explicit template + /// argument information. + /// + /// The only way to do this as far as I know, is inspecting manually the + /// AST, looking for TypeRefs inside. This, unfortunately, doesn't work for + /// more complex cases, see the comment on the assertion below. + /// + /// To see an example of what this handles: + /// + /// ``` + /// template<typename T> + /// class Incomplete { + /// T p; + /// }; + /// + /// template<typename U> + /// class Foo { + /// Incomplete<U> bar; + /// }; + /// ``` + fn build_template_wrapper(&mut self, + wrapping: ItemId, + parent_id: ItemId, + ty: &clang::Type, + location: clang::Cursor) -> ItemId { + use clangll::*; + let mut args = vec![]; + let mut found_invalid_template_ref = false; + let self_id = ItemId::next(); + location.visit(|c, _| { + if c.kind() == CXCursor_TemplateRef && + c.cur_type().kind() == CXType_Invalid { + found_invalid_template_ref = true; + } + if c.kind() == CXCursor_TypeRef { + let new_ty = + Item::from_ty_or_ref(c.cur_type(), Some(*c), Some(self_id), self); + args.push(new_ty); + } + CXChildVisit_Continue + }); + + let item = { + let wrapping_type = self.resolve_type(wrapping); + let old_args = match *wrapping_type.kind() { + TypeKind::Comp(ref ci) => ci.template_args(), + _ => panic!("how?"), + }; + // The following assertion actually fails with partial template + // specialization. But as far as I know there's no way at all to + // grab the specialized types from neither the AST or libclang. + // + // This flaw was already on the old parser, but I now think it has + // no clear solution. + // + // For an easy example in which there's no way at all of getting the + // `int` type, except manually parsing the spelling: + // + // template<typename T, typename U> + // class Incomplete { + // T d; + // U p; + // }; + // + // template<typename U> + // class Foo { + // Incomplete<U, int> bar; + // }; + // + // debug_assert_eq!(old_args.len(), args.len()); + // + // That being said, this is not so common, so just error! and hope + // for the best, returning the previous type, who knows. + if old_args.len() != args.len() { + error!("Found partial template specialization, expect dragons!"); + return wrapping; + } + + let type_kind = TypeKind::TemplateRef(wrapping, args); + let name = ty.spelling(); + let name = if name.is_empty() { None } else { Some(name) }; + let ty = Type::new(name, ty.fallible_layout().ok(), type_kind, ty.is_const()); + Item::new(self_id, None, None, parent_id, ItemKind::Type(ty)) + }; + + // Bypass all the validations in add_item explicitly. + self.items.insert(self_id, item); + self_id + } + + /// Looks up for an already resolved type, either because it's builtin, or + /// because we already have it in the map. + pub fn builtin_or_resolved_ty(&mut self, + parent_id: Option<ItemId>, + ty: &clang::Type, + location: Option<clang::Cursor>) -> Option<ItemId> { + use clangll::{CXCursor_ClassTemplate, CXCursor_ClassTemplatePartialSpecialization}; + debug!("builtin_or_resolved_ty: {:?}, {:?}, {:?}", ty, location, parent_id); + let mut declaration = ty.declaration(); + if !declaration.is_valid() { + if let Some(location) = location { + if location.kind() == CXCursor_ClassTemplate || + location.kind() == CXCursor_ClassTemplatePartialSpecialization { + declaration = location; + } + } + } + let canonical_declaration = declaration.canonical(); + if canonical_declaration.is_valid() { + // First lookup to see if we already have it resolved. + let id = self.types.get(&canonical_declaration).map(|id| *id); + if let Some(id) = id { + debug!("Already resolved ty {:?}, {:?}, {:?} {:?}", + id, declaration, ty, location); + // If the declaration existed, we *might* be done, but it's not + // the case for class templates, where the template arguments + // may vary. + // + // In this case, we create a TemplateRef with the new template + // arguments, pointing to the canonical template. + // + // Note that we only do it if parent_id is some, and we have a + // location for building the new arguments, the template + // argument names don't matter in the global context. + if (declaration.kind() == CXCursor_ClassTemplate || + declaration.kind() == CXCursor_ClassTemplatePartialSpecialization) && + *ty != canonical_declaration.cur_type() && + location.is_some() && parent_id.is_some() { + return Some( + self.build_template_wrapper(id, parent_id.unwrap(), ty, + location.unwrap())); + } + + return Some(self.build_ty_wrapper(id, parent_id, ty)); + } + } + + debug!("Not resolved, maybe builtin?"); + + // Else, build it. + self.build_builtin_ty(ty, declaration) + } + + // This is unfortunately a lot of bloat, but is needed to properly track + // constness et. al. + // + // We should probably make the constness tracking separate, so it doesn't + // bloat that much, but hey, we already bloat the heck out of builtin types. + fn build_ty_wrapper(&mut self, + wrapped_id: ItemId, + parent_id: Option<ItemId>, + ty: &clang::Type) -> ItemId { + let id = ItemId::next(); + let spelling = ty.spelling(); + let is_const = ty.is_const(); + let layout = ty.fallible_layout().ok(); + let type_kind = TypeKind::ResolvedTypeRef(wrapped_id); + let ty = Type::new(Some(spelling), layout, type_kind, is_const); + let item = Item::new(id, None, None, + parent_id.unwrap_or(self.current_module), ItemKind::Type(ty)); + self.add_builtin_item(item); + id + } + + fn build_builtin_ty(&mut self, + ty: &clang::Type, + _declaration: Cursor) -> Option<ItemId> { + use clangll::*; + let type_kind = match ty.kind() { + CXType_NullPtr => TypeKind::NullPtr, + CXType_Void => TypeKind::Void, + CXType_Bool => TypeKind::Int(IntKind::Bool), + CXType_Int => TypeKind::Int(IntKind::Int), + CXType_UInt => TypeKind::Int(IntKind::UInt), + CXType_SChar | + CXType_Char_S => TypeKind::Int(IntKind::Char), + CXType_UChar | + CXType_Char_U => TypeKind::Int(IntKind::UChar), + CXType_Short => TypeKind::Int(IntKind::Short), + CXType_UShort => TypeKind::Int(IntKind::UShort), + CXType_WChar | + CXType_Char16 => TypeKind::Int(IntKind::U16), + CXType_Char32 => TypeKind::Int(IntKind::U32), + CXType_Long => TypeKind::Int(IntKind::Long), + CXType_ULong => TypeKind::Int(IntKind::ULong), + CXType_LongLong => TypeKind::Int(IntKind::LongLong), + CXType_ULongLong => TypeKind::Int(IntKind::ULongLong), + CXType_Float => TypeKind::Float(FloatKind::Float), + CXType_Double => TypeKind::Float(FloatKind::Double), + CXType_LongDouble => TypeKind::Float(FloatKind::LongDouble), + _ => return None, + }; + + let spelling = ty.spelling(); + let is_const = ty.is_const(); + let layout = ty.fallible_layout().ok(); + let ty = Type::new(Some(spelling), layout, type_kind, is_const); + let id = ItemId::next(); + let item = Item::new(id, None, None, self.root_module, ItemKind::Type(ty)); + self.add_builtin_item(item); + Some(id) + } + + pub fn translation_unit(&self) -> &clang::TranslationUnit { + &self.translation_unit + } + + pub fn parsed_macro(&self, macro_name: &str) -> bool { + self.parsed_macros.contains(macro_name) + } + + pub fn note_parsed_macro(&mut self, macro_name: String) { + debug_assert!(!self.parsed_macros.contains(¯o_name)); + self.parsed_macros.insert(macro_name); + } + + pub fn in_codegen_phase(&self) -> bool { + self.gen_ctx.is_some() + } + + /// This is a bit of a hack, but it's done so using the replaces="xxx" + /// annotation implies hide in the other type. + pub fn replace(&mut self, name: &str, potential_ty: ItemId) { + self.replacements.insert(name.into(), potential_ty); + } + + pub fn hidden_by_name(&self, name: &str) -> bool { + debug_assert!(self.in_codegen_phase(), + "You're not supposed to call this yet"); + self.options.hidden_types.contains(name) + } + + pub fn opaque_by_name(&self, name: &str) -> bool { + debug_assert!(self.in_codegen_phase(), + "You're not supposed to call this yet"); + self.options.opaque_types.contains(name) + } + + pub fn options(&self) -> &BindgenOptions { + &self.options + } + + /// Given a CXCursor_Namespace cursor, return the item id of the + /// corresponding module, or create one on the fly. + pub fn module(&mut self, cursor: clang::Cursor) -> ItemId { + use clangll::*; + assert!(cursor.kind() == CXCursor_Namespace, "Be a nice person"); + let cursor = cursor.canonical(); + let module_id = match self.modules.get(&cursor) { + Some(id) => return *id, + None => ItemId::next(), + }; + + let module_name = self.translation_unit + .tokens(&cursor).and_then(|tokens| { + if tokens.len() <= 1 { + None + } else { + match &*tokens[1].spelling { + "{" => None, + s => Some(s.to_owned()), + } + } + }); + + let module = Module::new(module_name); + let module = Item::new(module_id, None, None, self.current_module, + ItemKind::Module(module)); + + self.add_item(module, None, None); + + module_id + } + + pub fn with_module<F>(&mut self, module_id: ItemId, cb: F) + where F: FnOnce(&mut Self, &mut Vec<ItemId>) + { + debug_assert!(self.resolve_item(module_id).kind().is_module(), "Wat"); + + let previous_id = self.current_module; + self.current_module = module_id; + + let mut children = vec![]; + cb(self, &mut children); + + self.items.get_mut(&module_id).unwrap() + .as_module_mut().expect("Not a module?") + .children_mut().extend(children.into_iter()); + + self.current_module = previous_id; + } +} + +/// This was originally a type that only exposes the resolve_type operation to +/// its consumers. +/// +/// Later a made resolve_type public, so... meh. It should go away soon. +pub type TypeResolver<'ctx> = BindgenContext<'ctx>; |