diff options
-rw-r--r-- | src/clang.rs | 172 | ||||
-rw-r--r-- | src/ir/context.rs | 18 | ||||
-rw-r--r-- | src/ir/var.rs | 4 |
3 files changed, 125 insertions, 69 deletions
diff --git a/src/clang.rs b/src/clang.rs index a31cba31..8a3e3c68 100644 --- a/src/clang.rs +++ b/src/clang.rs @@ -507,11 +507,9 @@ impl Cursor { let mut found_attr = false; self.visit(|cur| { if cur.kind() == CXCursor_UnexposedAttr { - found_attr = cur.tokens().map(|tokens| { - tokens.iter().any(|t| { - t.kind == CXToken_Identifier && t.spelling == attr - }) - }).unwrap_or(false); + found_attr = cur.tokens().iter().any(|t| { + t.kind == CXToken_Identifier && t.spelling() == attr.as_bytes() + }); if found_attr { return CXChildVisit_Break; @@ -653,64 +651,126 @@ impl Cursor { } /// Gets the tokens that correspond to that cursor. - pub fn tokens(&self) -> Option<Vec<Token>> { - let range = self.extent(); - let mut tokens = vec![]; - unsafe { - let tu = clang_Cursor_getTranslationUnit(self.x); - let mut token_ptr = ptr::null_mut(); - let mut num_tokens: c_uint = 0; - clang_tokenize(tu, range, &mut token_ptr, &mut num_tokens); - if token_ptr.is_null() { - return None; - } + pub fn tokens(&self) -> RawTokens { + RawTokens::new(self) + } - let token_array = - slice::from_raw_parts(token_ptr, num_tokens as usize); - for &token in token_array.iter() { - let kind = clang_getTokenKind(token); - let spelling = - cxstring_into_string(clang_getTokenSpelling(tu, token)); + /// Gets the tokens that correspond to that cursor as `cexpr` tokens. + pub fn cexpr_tokens(self) -> Vec<cexpr::token::Token> { + use cexpr::token; - tokens.push(Token { - kind: kind, - spelling: spelling, - }); + self.tokens().iter().filter_map(|token| { + let kind = match token.kind { + CXToken_Punctuation => token::Kind::Punctuation, + CXToken_Literal => token::Kind::Literal, + CXToken_Identifier => token::Kind::Identifier, + CXToken_Keyword => token::Kind::Keyword, + // NB: cexpr is not too happy about comments inside + // expressions, so we strip them down here. + CXToken_Comment => return None, + _ => { + error!("Found unexpected token kind: {:?}", token); + return None; + } + }; + + Some(token::Token { + kind, + raw: token.spelling().to_vec().into_boxed_slice(), + }) + }).collect() + } +} + +/// A struct that owns the tokenizer result from a given cursor. +pub struct RawTokens<'a> { + cursor: &'a Cursor, + tu: CXTranslationUnit, + tokens: *mut CXToken, + token_count: c_uint, +} + +impl<'a> RawTokens<'a> { + fn new(cursor: &'a Cursor) -> Self { + let mut tokens = ptr::null_mut(); + let mut token_count = 0; + let range = cursor.extent(); + let tu = unsafe { + clang_Cursor_getTranslationUnit(cursor.x) + }; + unsafe { clang_tokenize(tu, range, &mut tokens, &mut token_count) }; + Self { cursor, tu, tokens, token_count } + } + + fn as_slice(&self) -> &[CXToken] { + if self.tokens.is_null() { + return &[]; + } + unsafe { slice::from_raw_parts(self.tokens, self.token_count as usize) } + } + + /// Get an iterator over these tokens. + pub fn iter(&self) -> ClangTokenIterator { + ClangTokenIterator { + tu: self.tu, + raw: self.as_slice().iter(), + } + } +} + +impl<'a> Drop for RawTokens<'a> { + fn drop(&mut self) { + if !self.tokens.is_null() { + unsafe { + clang_disposeTokens(self.tu, self.tokens, self.token_count as c_uint); } - clang_disposeTokens(tu, token_ptr, num_tokens); } - Some(tokens) } +} - /// Gets the tokens that correspond to that cursor as `cexpr` tokens. - pub fn cexpr_tokens(self) -> Option<Vec<cexpr::token::Token>> { - use cexpr::token; +/// A raw clang token, that exposes only the kind and spelling. This is a +/// slightly more convenient version of `CXToken` which owns the spelling +/// string. +#[derive(Debug)] +pub struct ClangToken { + spelling: CXString, + /// The kind of token, this is the same as the relevant member from + /// `CXToken`. + pub kind: CXTokenKind, +} - self.tokens().map(|tokens| { - tokens - .into_iter() - .filter_map(|token| { - let kind = match token.kind { - CXToken_Punctuation => token::Kind::Punctuation, - CXToken_Literal => token::Kind::Literal, - CXToken_Identifier => token::Kind::Identifier, - CXToken_Keyword => token::Kind::Keyword, - // NB: cexpr is not too happy about comments inside - // expressions, so we strip them down here. - CXToken_Comment => return None, - _ => { - error!("Found unexpected token kind: {:?}", token); - return None; - } - }; - - Some(token::Token { - kind: kind, - raw: token.spelling.into_bytes().into_boxed_slice(), - }) - }) - .collect::<Vec<_>>() - }) +impl ClangToken { + /// Get the token spelling, without being converted to utf-8. + pub fn spelling(&self) -> &[u8] { + let c_str = unsafe { + CStr::from_ptr(clang_getCString(self.spelling) as *const _) + }; + c_str.to_bytes() + } +} + +impl Drop for ClangToken { + fn drop(&mut self) { + unsafe { clang_disposeString(self.spelling) } + } +} + +/// An iterator over a set of Tokens. +pub struct ClangTokenIterator<'a> { + tu: CXTranslationUnit, + raw: slice::Iter<'a, CXToken>, +} + +impl<'a> Iterator for ClangTokenIterator<'a> { + type Item = ClangToken; + + fn next(&mut self) -> Option<Self::Item> { + let raw = self.raw.next()?; + unsafe { + let kind = clang_getTokenKind(*raw); + let spelling = clang_getTokenSpelling(self.tu, *raw); + Some(ClangToken { kind, spelling }) + } } } diff --git a/src/ir/context.rs b/src/ir/context.rs index 063c79f0..f8b4f54a 100644 --- a/src/ir/context.rs +++ b/src/ir/context.rs @@ -2163,21 +2163,17 @@ If you encounter an error missing from this list, please file an issue or a PR!" let mut module_name = None; let spelling = cursor.spelling(); - if !spelling.is_empty() - { + if !spelling.is_empty() { module_name = Some(spelling) } - let tokens = match cursor.tokens() { - Some(tokens) => tokens, - None => return (module_name, ModuleKind::Normal), - }; + let tokens = cursor.tokens(); let mut iter = tokens.iter(); let mut kind = ModuleKind::Normal; let mut found_namespace_keyword = false; while let Some(token) = iter.next() { - match &*token.spelling { - "inline" => { + match token.spelling() { + b"inline" => { assert!(!found_namespace_keyword); assert!(kind != ModuleKind::Inline); kind = ModuleKind::Inline; @@ -2192,16 +2188,16 @@ If you encounter an error missing from this list, please file an issue or a PR!" // // Fortunately enough, inline nested namespace specifiers aren't // a thing, and are invalid C++ :) - "namespace" | "::" => { + b"namespace" | b"::" => { found_namespace_keyword = true; } - "{" => { + b"{" => { assert!(found_namespace_keyword); break; } name if found_namespace_keyword => { if module_name.is_none() { - module_name = Some(name.to_owned()); + module_name = Some(String::from_utf8_lossy(name).into_owned()); } break; } diff --git a/src/ir/var.rs b/src/ir/var.rs index 14f133fd..2180a1b8 100644 --- a/src/ir/var.rs +++ b/src/ir/var.rs @@ -309,7 +309,7 @@ fn parse_macro( ) -> Option<(Vec<u8>, cexpr::expr::EvalResult)> { use cexpr::expr; - let mut cexpr_tokens = cursor.cexpr_tokens()?; + let mut cexpr_tokens = cursor.cexpr_tokens(); let parser = expr::IdentifierParser::new(ctx.parsed_macros()); @@ -338,7 +338,7 @@ fn parse_int_literal_tokens(cursor: &clang::Cursor) -> Option<i64> { use cexpr::expr; use cexpr::expr::EvalResult; - let cexpr_tokens = cursor.cexpr_tokens()?; + let cexpr_tokens = cursor.cexpr_tokens(); // TODO(emilio): We can try to parse other kinds of literals. match expr::expr(&cexpr_tokens) { |