ir: A bit more documentation in parts of the `ir` module.

author: Emilio Cobos Álvarez <ecoal95@gmail.com> 2016-10-18 23:31:14 +0200
committer: Emilio Cobos Álvarez <ecoal95@gmail.com> 2016-10-18 23:31:14 +0200
commit: f423873cf3736e4e9c20e99ebf9886a84e5585f3 (patch)
tree: 813aa3a9eb93124ff9a7fd655f615e6e058ba7a8 /src
parent: d2291c77b84cafe5ed2ebe3e6c0c5c649f77e188 (diff)
4 files changed, 245 insertions, 89 deletions
diff --git a/src/ir/item.rs b/src/ir/item.rs
index 4808806b..da0e6aba 100644
--- a/src/ir/item.rs
+++ b/src/ir/item.rs
@@ -42,7 +42,7 @@ pub trait ItemCanonicalPath {
 
 /// A single identifier for an item.
 ///
-/// TODO: Build stronger abstractions on top of this, like TypeId(ItemId), ...
+/// TODO: Build stronger abstractions on top of this, like TypeId(ItemId)?
 #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct ItemId(usize);
 
@@ -78,6 +78,20 @@ impl ItemCanonicalPath for ItemId {
     }
 }
 
+/// An item is the base of the bindgen representation, it can be either a
+/// module, a type, a function, or a variable (see `ItemKind` for more
+/// information).
+///
+/// Items form a tree, and each item only stores the id of the parent.
+///
+/// The root of this tree is the "root module", a meta-item used to hold all the
+/// top-level items.
+///
+/// An item may have a comment, and annotations (see the `annotations` module).
+///
+/// Note that even though we parse all the types of annotations in comments, not
+/// all of them apply to every item. Those rules are described in the
+/// `annotations` module.
 #[derive(Debug)]
 pub struct Item {
     /// This item's id.
@@ -133,6 +147,24 @@ impl Item {
         &mut self.kind
     }
 
+    /// Returns whether this item is a top-level item, from the point of view of
+    /// bindgen.
+    ///
+    /// This point of view changes depending on whether namespaces are enabled
+    /// or not. That way, in the following example:
+    ///
+    /// ```c++
+    /// namespace foo {
+    ///     static int var;
+    /// }
+    /// ```
+    ///
+    /// `var` would be a toplevel item if namespaces are disabled, but won't if
+    /// they aren't.
+    ///
+    /// This function is used to determine when the codegen phase should call
+    /// `codegen` on an item, since it's assumed that any item that is not
+    /// top-level will be generated by its parent.
     pub fn is_toplevel(&self, ctx: &BindgenContext) -> bool {
         // FIXME: Workaround for some types falling behind when parsing weird
         // stl classes, for example.
@@ -167,12 +199,55 @@ impl Item {
         self.kind().expect_function()
     }
 
-    // This check is needed because even though the type might not contain the
-    // applicable template args itself, they might apply transitively via, for
-    // example, the parent.
-    //
-    // It's kind of unfortunate (in the sense that it's a sort of complex
-    // process, but I think it gets all the cases).
+    /// Checks whether an item contains in its "type signature" some named type.
+    ///
+    /// This function is used to avoid unused template parameter errors in Rust
+    /// when generating typedef declarations, and also to know whether we need
+    /// to generate a PhantomData member for a template parameter.
+    ///
+    /// For example, in code like the following:
+    ///
+    /// ```c++
+    /// template<typename T, typename U>
+    /// struct Foo {
+    ///     T bar;
+    ///
+    ///     struct Baz {
+    ///         U bas;
+    ///     };
+    /// };
+    /// ```
+    ///
+    /// Both Foo and Baz contain both `T` and `U` template parameters in their
+    /// signature:
+    ///
+    ///  * `Foo<T, U>`
+    ///  * `Bar<T, U>`
+    ///
+    /// But the structure for `Foo` would look like:
+    ///
+    /// ```rust
+    /// struct Foo<T, U> {
+    ///     bar: T,
+    ///     _phantom0: ::std::marker::PhantomData<U>,
+    /// }
+    /// ```
+    ///
+    /// because non of its member fields contained the `U` type in the
+    /// signature. Similarly, `Bar` would contain a `PhantomData<T>` type, for
+    /// the same reason.
+    ///
+    /// Note that this is somewhat similar to `applicable_template_args`, but
+    /// this also takes into account other kind of types, like arrays,
+    /// (`[T; 40]`), pointers: `*mut T`, etc...
+    ///
+    /// Normally we could do this check just in the `Type` kind, but we also
+    /// need to check the `applicable_template_args` more generally, since we
+    /// could need a type transitively from our parent, see the test added in
+    /// <https://github.com/servo/rust-bindgen/pull/85/commits/2a3f93074dd2898669dbbce6e97e5cc4405d7cb1>
+    ///
+    /// It's kind of unfortunate (in the sense that it's a sort of complex
+    /// process), but I think it should get all the cases.
     fn signature_contains_named_type(&self, ctx: &BindgenContext, ty: &Type) -> bool {
         debug_assert!(ty.is_named());
         self.expect_type().signature_contains_named_type(ctx, ty) ||
@@ -181,6 +256,39 @@ impl Item {
             })
     }
 
+    /// Returns the template arguments that apply to a struct. This is a concept
+    /// needed because of type declarations inside templates, for example:
+    ///
+    /// ```c++
+    /// template<typename T>
+    /// class Foo {
+    ///     typedef T element_type;
+    ///     typedef int Bar;
+    ///
+    ///     template<typename U>
+    ///     class Baz {
+    ///     };
+    /// };
+    /// ```
+    ///
+    /// In this case, the applicable template arguments for the different types
+    /// would be:
+    ///
+    ///  * `Foo`: [`T`]
+    ///  * `Foo::element_type`: [`T`]
+    ///  * `Foo::Bar`: [`T`]
+    ///  * `Foo::Baz`: [`T`, `U`]
+    ///
+    /// You might notice that we can't generate something like:
+    ///
+    /// ```rust,ignore
+    /// type Foo_Bar<T> = ::std::os::raw::c_int;
+    /// ```
+    ///
+    /// since that would be invalid Rust. Still, conceptually, `Bar` *could* use
+    /// the template parameter type `T`, and that's exactly what this method
+    /// represents. The unused template parameters get stripped in the
+    /// `signature_contains_named_type` check.
     pub fn applicable_template_args(&self, ctx: &BindgenContext) -> Vec<ItemId> {
         let ty = match *self.kind() {
             ItemKind::Type(ref ty) => ty,
@@ -275,6 +383,15 @@ impl Item {
 
     /// Get the canonical name without taking into account the replaces
     /// annotation.
+    ///
+    /// This is the base logic used to implement hiding and replacing via
+    /// annotations, and also to implement proper name mangling.
+    ///
+    /// The idea is that each generated type in the same "level" (read: module
+    /// or namespace) has a unique canonical name.
+    ///
+    /// This name should be derived from the immutable state contained in the
+    /// type and the parent chain, since it should be consistent.
     fn real_canonical_name(&self,
                            ctx: &BindgenContext,
                            count_namespaces: bool,
@@ -424,12 +541,6 @@ impl ClangItemParser for Item {
         let comment = cursor.raw_comment();
         let annotations = Annotations::new(&cursor);
 
-        // FIXME: The current_module logic is not really accurate. We should be
-        // able to index modules by their Cursor, and locate the proper module
-        // for a given item.
-        //
-        // We don't support modules properly though, so there's no rush for
-        // this.
         let current_module = context.current_module();
         macro_rules! try_parse {
             ($what:ident) => {
@@ -486,7 +597,8 @@ impl ClangItemParser for Item {
         if cursor.kind() == clangll::CXCursor_UnexposedDecl {
             Err(ParseError::Recurse)
         } else {
-            error!("Unhandled cursor kind: {} ({})", ::clang::kind_to_str(cursor.kind()), cursor.kind());
+            error!("Unhandled cursor kind: {} ({})",
+                   ::clang::kind_to_str(cursor.kind()), cursor.kind());
             Err(ParseError::Continue)
         }
     }
@@ -498,6 +610,17 @@ impl ClangItemParser for Item {
         Self::from_ty_or_ref_with_id(ItemId::next(), ty, location, parent_id, context)
     }
 
+    /// Parse a type, if we know it before hand, or otherwise store it as an
+    /// `UnresolvedTypeRef`, which means something like "a reference to a type
+    /// we still don't know".
+    ///
+    /// This logic is needed to avoid parsing items with the incorrect parent
+    /// and it's sort of complex to explain, so I'll just point to
+    /// `tests/headers/typeref.hpp` to see the kind of constructs that forced
+    /// this.
+    ///
+    /// Typerefs are resolved once parsing is completely done, see
+    /// `BindgenContext::resolve_typerefs`.
     fn from_ty_or_ref_with_id(potential_id: ItemId,
                               ty: clang::Type,
                               location: Option<clang::Cursor>,
@@ -537,6 +660,14 @@ impl ClangItemParser for Item {
         Self::from_ty_with_id(ItemId::next(), ty, location, parent_id, context)
     }
 
+    /// This is one of the trickiest methods you'll find (probably along with
+    /// some of the ones that handle templates in `BindgenContext`).
+    ///
+    /// This method parses a type, given the potential id of that type (if
+    /// parsing it was correct), an optional location we're scanning, which is
+    /// critical some times to obtain information, an optional parent item id,
+    /// that will, if it's `None`, become the current module id, and the
+    /// context.
     fn from_ty_with_id(id: ItemId,
                        ty: &clang::Type,
                        location: Option<clang::Cursor>,
diff --git a/src/ir/item_kind.rs b/src/ir/item_kind.rs
index b6f317a7..a47d23a3 100644
--- a/src/ir/item_kind.rs
+++ b/src/ir/item_kind.rs
@@ -15,6 +15,7 @@ pub enum ItemKind {
 
     /// A function or method declaration.
     Function(Function),
+
     /// A variable declaration, most likely a static.
     Var(Var),
 }
diff --git a/src/ir/mod.rs b/src/ir/mod.rs
index 07ac3059..1f7c3130 100644
--- a/src/ir/mod.rs
+++ b/src/ir/mod.rs
@@ -1,3 +1,6 @@
+//! The module where the Intermediate Representation bindgen uses, and the
+//! parsing code that generates it lives.
+
 pub mod annotations;
 pub mod comp;
 pub mod context;
diff --git a/src/ir/ty.rs b/src/ir/ty.rs
index 0cf8174e..6a2701fa 100644
--- a/src/ir/ty.rs
+++ b/src/ir/ty.rs
@@ -8,16 +8,17 @@ use super::context::BindgenContext;
 use parse::{ClangItemParser, ParseResult, ParseError};
 use clang::{self, Cursor};
 
+/// The base representation of a type in bindgen.
+///
+/// A type has an optional name, that can't be empty, a `layout` (size,
+/// alignment and packedness) if known, a `Kind`, which determines which kind of
+/// type it is, and whether the type is const.
 #[derive(Debug)]
 pub struct Type {
     /// The name of the type, or None if it was an unnamed struct or union.
     name: Option<String>,
     /// The layout of the type, if known.
     layout: Option<Layout>,
-    /// Whether this type is marked as opaque.
-    opaque: bool,
-    /// Whether this type is marked as hidden.
-    hide: bool,
     /// The inner kind of the type
     kind: TypeKind,
     /// Whether this type is const-qualified.
@@ -41,8 +42,6 @@ impl Type {
         Type {
             name: name,
             layout: layout,
-            opaque: false,
-            hide: false,
             kind: kind,
             is_const: is_const,
         }
@@ -116,134 +115,142 @@ impl Type {
         self.is_const
     }
 
-    pub fn layout(&self, type_resolver: &BindgenContext) -> Option<Layout> {
+    pub fn layout(&self, ctx: &BindgenContext) -> Option<Layout> {
         use std::mem;
 
         self.layout.or_else(|| {
             match self.kind {
                 TypeKind::Comp(ref ci)
-                    => ci.layout(type_resolver),
+                    => ci.layout(ctx),
                 // FIXME(emilio): This is a hack for anonymous union templates.
                 // Use the actual pointer size!
                 TypeKind::Pointer(..) |
                 TypeKind::BlockPointer
                     => Some(Layout::new(mem::size_of::<*mut ()>(), mem::align_of::<*mut ()>())),
                 TypeKind::ResolvedTypeRef(inner)
-                    => type_resolver.resolve_type(inner).layout(type_resolver),
+                    => ctx.resolve_type(inner).layout(ctx),
                 _ => None,
             }
         })
     }
 
-    pub fn is_opaque(&self, _type_resolver: &BindgenContext) -> bool {
-        self.opaque
-    }
-
-    pub fn can_derive_debug(&self, type_resolver: &BindgenContext) -> bool {
-        !self.is_opaque(type_resolver) && match self.kind {
+    /// Wether we can derive rust's `Debug` annotation in Rust. This should
+    /// ideally be a no-op that just returns `true`, but instead needs to be a
+    /// recursive method that checks whether all the proper members can derive
+    /// debug or not, because of the limit rust has on 32 items as max in the
+    /// array.
+    pub fn can_derive_debug(&self, ctx: &BindgenContext) -> bool {
+        match self.kind {
             TypeKind::Array(t, len) => {
                 len <= RUST_DERIVE_IN_ARRAY_LIMIT &&
-                type_resolver.resolve_type(t).can_derive_debug(type_resolver)
+                ctx.resolve_type(t).can_derive_debug(ctx)
             }
             TypeKind::ResolvedTypeRef(t) |
             TypeKind::TemplateAlias(t, _) |
             TypeKind::Alias(_, t) => {
-                type_resolver.resolve_type(t).can_derive_debug(type_resolver)
+                ctx.resolve_type(t).can_derive_debug(ctx)
             }
             TypeKind::Comp(ref info) => {
-                info.can_derive_debug(type_resolver, self.layout(type_resolver))
+                info.can_derive_debug(ctx, self.layout(ctx))
             }
             _ => true,
         }
     }
 
-    // For some reason, deriving copies of an array of a type that is not known
-    // to be copy is a compile error. e.g.:
-    //
-    // #[derive(Copy)]
-    // struct A<T> {
-    //     member: T,
-    // }
-    //
-    // is fine, while:
-    //
-    // #[derive(Copy)]
-    // struct A<T> {
-    //     member: [T; 1],
-    // }
-    //
-    // is an error.
-    //
-    // That's the point of the existence of can_derive_copy_in_array().
-    pub fn can_derive_copy_in_array(&self, type_resolver: &BindgenContext, item: &Item) -> bool {
+    /// For some reason, deriving copies of an array of a type that is not known
+    /// to be copy is a compile error. e.g.:
+    ///
+    /// ```rust
+    /// #[derive(Copy, Clone)]
+    /// struct A<T> {
+    ///     member: T,
+    /// }
+    /// ```
+    ///
+    /// is fine, while:
+    ///
+    /// ```rust,ignore
+    /// #[derive(Copy, Clone)]
+    /// struct A<T> {
+    ///     member: [T; 1],
+    /// }
+    /// ```
+    ///
+    /// is an error.
+    ///
+    /// That's the whole point of the existence of `can_derive_copy_in_array`.
+    pub fn can_derive_copy_in_array(&self, ctx: &BindgenContext, item: &Item) -> bool {
         match self.kind {
             TypeKind::ResolvedTypeRef(t) |
             TypeKind::TemplateAlias(t, _) |
             TypeKind::Alias(_, t) |
             TypeKind::Array(t, _) => {
-                type_resolver.resolve_item(t)
-                             .can_derive_copy_in_array(type_resolver)
+                ctx.resolve_item(t)
+                             .can_derive_copy_in_array(ctx)
             }
             TypeKind::Named(..) => false,
-            _ => self.can_derive_copy(type_resolver, item),
+            _ => self.can_derive_copy(ctx, item),
         }
     }
 
-    pub fn can_derive_copy(&self, type_resolver: &BindgenContext, item: &Item) -> bool {
-        !self.is_opaque(type_resolver) && match self.kind {
+    /// Wether we'd be able to derive the `Copy` trait in Rust or not. Same
+    /// rationale than `can_derive_debug`.
+    pub fn can_derive_copy(&self, ctx: &BindgenContext, item: &Item) -> bool {
+        match self.kind {
             TypeKind::Array(t, len) => {
                 len <= RUST_DERIVE_IN_ARRAY_LIMIT &&
-                type_resolver.resolve_item(t).can_derive_copy_in_array(type_resolver)
+                ctx.resolve_item(t).can_derive_copy_in_array(ctx)
             }
             TypeKind::ResolvedTypeRef(t) |
             TypeKind::TemplateAlias(t, _) |
             TypeKind::TemplateRef(t, _) |
             TypeKind::Alias(_, t) => {
-                type_resolver.resolve_item(t).can_derive_copy(type_resolver)
+                ctx.resolve_item(t).can_derive_copy(ctx)
             }
             TypeKind::Comp(ref info) => {
-                info.can_derive_copy(type_resolver, item)
+                info.can_derive_copy(ctx, item)
             }
             _ => true,
         }
     }
 
-    pub fn has_vtable(&self, type_resolver: &BindgenContext) -> bool {
+    /// Whether this type has a vtable.
+    pub fn has_vtable(&self, ctx: &BindgenContext) -> bool {
         // FIXME: Can we do something about template parameters? Huh...
         match self.kind {
             TypeKind::TemplateRef(t, _) |
             TypeKind::TemplateAlias(t, _) |
             TypeKind::Alias(_, t) |
-            TypeKind::ResolvedTypeRef(t) |
-            TypeKind::Array(t, _) => {
-                type_resolver.resolve_type(t).has_vtable(type_resolver)
+            TypeKind::ResolvedTypeRef(t) => {
+                ctx.resolve_type(t).has_vtable(ctx)
             }
             TypeKind::Comp(ref info) => {
-                info.has_vtable(type_resolver)
+                info.has_vtable(ctx)
             }
             _ => false,
         }
 
     }
 
-    pub fn has_destructor(&self, type_resolver: &BindgenContext) -> bool {
-        self.is_opaque(type_resolver) || match self.kind {
+    /// Returns whether this type has a destructor.
+    pub fn has_destructor(&self, ctx: &BindgenContext) -> bool {
+        match self.kind {
             TypeKind::TemplateRef(t, _) |
             TypeKind::TemplateAlias(t, _) |
             TypeKind::Alias(_, t) |
-            TypeKind::ResolvedTypeRef(t) |
-            TypeKind::Array(t, _) => {
-                type_resolver.resolve_type(t).has_destructor(type_resolver)
+            TypeKind::ResolvedTypeRef(t) => {
+                ctx.resolve_type(t).has_destructor(ctx)
             }
             TypeKind::Comp(ref info) => {
-                info.has_destructor(type_resolver)
+                info.has_destructor(ctx)
             }
             _ => false,
         }
     }
 
+    /// See the comment in `Item::signature_contains_named_type`.
     pub fn signature_contains_named_type(&self,
-                                         type_resolver: &BindgenContext,
+                                         ctx: &BindgenContext,
                                          ty: &Type) -> bool {
         debug_assert!(ty.is_named());
         let name = match *ty.kind() {
@@ -258,30 +265,35 @@ impl Type {
             TypeKind::Array(t, _) |
             TypeKind::Pointer(t) |
             TypeKind::Alias(_, t)
-                => type_resolver.resolve_type(t)
-                                .signature_contains_named_type(type_resolver, ty),
+                => ctx.resolve_type(t)
+                                .signature_contains_named_type(ctx, ty),
             TypeKind::Function(ref sig) => {
                 sig.argument_types().iter().any(|&(_, arg)| {
-                    type_resolver.resolve_type(arg)
-                                 .signature_contains_named_type(type_resolver, ty)
+                    ctx.resolve_type(arg)
+                                 .signature_contains_named_type(ctx, ty)
                 }) ||
-                type_resolver.resolve_type(sig.return_type())
-                             .signature_contains_named_type(type_resolver, ty)
+                ctx.resolve_type(sig.return_type())
+                             .signature_contains_named_type(ctx, ty)
             },
             TypeKind::TemplateAlias(_, ref template_args) |
             TypeKind::TemplateRef(_, ref template_args) => {
                 template_args.iter().any(|arg| {
-                    type_resolver.resolve_type(*arg)
-                                 .signature_contains_named_type(type_resolver, ty)
+                    ctx.resolve_type(*arg)
+                                 .signature_contains_named_type(ctx, ty)
                 })
             }
             TypeKind::Comp(ref ci)
-                => ci.signature_contains_named_type(type_resolver, ty),
+                => ci.signature_contains_named_type(ctx, ty),
             _   => false,
         }
     }
 
-    pub fn canonical_type<'tr>(&'tr self, type_resolver: &'tr BindgenContext) -> &'tr Type {
+    /// Returns the canonical type of this type, that is, the "inner type".
+    ///
+    /// For example, for a `typedef`, the canonical type would be the
+    /// `typedef`ed type, for a template specialization, would be the template
+    /// its specializing, and so on.
+    pub fn canonical_type<'tr>(&'tr self, ctx: &'tr BindgenContext) -> &'tr Type {
         match self.kind {
             TypeKind::Named(..) |
             TypeKind::Array(..) |
@@ -300,7 +312,7 @@ impl Type {
             TypeKind::Alias(_, inner) |
             TypeKind::TemplateAlias(inner, _) |
             TypeKind::TemplateRef(inner, _)
-                => type_resolver.resolve_type(inner).canonical_type(type_resolver),
+                => ctx.resolve_type(inner).canonical_type(ctx),
 
             TypeKind::UnresolvedTypeRef(..)
                 => unreachable!("Should have been resolved after parsing!"),
@@ -308,6 +320,7 @@ impl Type {
     }
 }
 
+/// The kind of float this type represents.
 #[derive(Debug, Copy, Clone, PartialEq)]
 pub enum FloatKind {
     Float,
@@ -316,9 +329,6 @@ pub enum FloatKind {
 }
 
 /// The different kinds of types that we can parse.
-///
-/// TODO: The name in the Alias and Named kinds is a bit unsound, should be in
-/// type.name?
 #[derive(Debug)]
 pub enum TypeKind {
     /// The void type.
@@ -371,19 +381,25 @@ pub enum TypeKind {
 }
 
 impl Type {
-    pub fn is_unsized(&self, type_resolver: &BindgenContext) -> bool {
+    /// Whether this type is unsized, that is, has no members. This is used to
+    /// derive whether we should generate a dummy `_address` field for structs,
+    /// to comply to the C and C++ layouts, that specify that every type needs
+    /// to be addressable.
+    pub fn is_unsized(&self, ctx: &BindgenContext) -> bool {
+        debug_assert!(ctx.in_codegen_phase(), "Not yet");
+
         match self.kind {
             TypeKind::Void => true,
-            TypeKind::Comp(ref ci) => ci.is_unsized(type_resolver),
+            TypeKind::Comp(ref ci) => ci.is_unsized(ctx),
             TypeKind::Array(inner, size) => {
                 size == 0 ||
-                type_resolver.resolve_type(inner).is_unsized(type_resolver)
+                ctx.resolve_type(inner).is_unsized(ctx)
             }
             TypeKind::ResolvedTypeRef(inner) |
             TypeKind::Alias(_, inner) |
             TypeKind::TemplateAlias(inner, _) |
             TypeKind::TemplateRef(inner, _)
-                => type_resolver.resolve_type(inner).is_unsized(type_resolver),
+                => ctx.resolve_type(inner).is_unsized(ctx),
             TypeKind::Named(..) |
             TypeKind::Int(..) |
             TypeKind::Float(..) |
@@ -399,6 +415,11 @@ impl Type {
         }
     }
 
+    /// This is another of the nasty methods. This one is the one that takes
+    /// care of the core logic of converting a clang type to a `Type`.
+    ///
+    /// It's sort of nasty and full of special-casing, but hopefully the
+    /// comments in every special case justify why they're there.
     pub fn from_clang_ty(potential_id: ItemId,
                          ty: &clang::Type,
                          location: Option<Cursor>,
author	Emilio Cobos Álvarez <ecoal95@gmail.com>	2016-10-18 23:31:14 +0200
committer	Emilio Cobos Álvarez <ecoal95@gmail.com>	2016-10-18 23:31:14 +0200
commit	f423873cf3736e4e9c20e99ebf9886a84e5585f3 (patch)
tree	813aa3a9eb93124ff9a7fd655f615e6e058ba7a8 /src
parent	d2291c77b84cafe5ed2ebe3e6c0c5c649f77e188 (diff)