summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/clang.rs172
-rw-r--r--src/ir/context.rs18
-rw-r--r--src/ir/var.rs4
3 files changed, 125 insertions, 69 deletions
diff --git a/src/clang.rs b/src/clang.rs
index a31cba31..8a3e3c68 100644
--- a/src/clang.rs
+++ b/src/clang.rs
@@ -507,11 +507,9 @@ impl Cursor {
let mut found_attr = false;
self.visit(|cur| {
if cur.kind() == CXCursor_UnexposedAttr {
- found_attr = cur.tokens().map(|tokens| {
- tokens.iter().any(|t| {
- t.kind == CXToken_Identifier && t.spelling == attr
- })
- }).unwrap_or(false);
+ found_attr = cur.tokens().iter().any(|t| {
+ t.kind == CXToken_Identifier && t.spelling() == attr.as_bytes()
+ });
if found_attr {
return CXChildVisit_Break;
@@ -653,64 +651,126 @@ impl Cursor {
}
/// Gets the tokens that correspond to that cursor.
- pub fn tokens(&self) -> Option<Vec<Token>> {
- let range = self.extent();
- let mut tokens = vec![];
- unsafe {
- let tu = clang_Cursor_getTranslationUnit(self.x);
- let mut token_ptr = ptr::null_mut();
- let mut num_tokens: c_uint = 0;
- clang_tokenize(tu, range, &mut token_ptr, &mut num_tokens);
- if token_ptr.is_null() {
- return None;
- }
+ pub fn tokens(&self) -> RawTokens {
+ RawTokens::new(self)
+ }
- let token_array =
- slice::from_raw_parts(token_ptr, num_tokens as usize);
- for &token in token_array.iter() {
- let kind = clang_getTokenKind(token);
- let spelling =
- cxstring_into_string(clang_getTokenSpelling(tu, token));
+ /// Gets the tokens that correspond to that cursor as `cexpr` tokens.
+ pub fn cexpr_tokens(self) -> Vec<cexpr::token::Token> {
+ use cexpr::token;
- tokens.push(Token {
- kind: kind,
- spelling: spelling,
- });
+ self.tokens().iter().filter_map(|token| {
+ let kind = match token.kind {
+ CXToken_Punctuation => token::Kind::Punctuation,
+ CXToken_Literal => token::Kind::Literal,
+ CXToken_Identifier => token::Kind::Identifier,
+ CXToken_Keyword => token::Kind::Keyword,
+ // NB: cexpr is not too happy about comments inside
+ // expressions, so we strip them down here.
+ CXToken_Comment => return None,
+ _ => {
+ error!("Found unexpected token kind: {:?}", token);
+ return None;
+ }
+ };
+
+ Some(token::Token {
+ kind,
+ raw: token.spelling().to_vec().into_boxed_slice(),
+ })
+ }).collect()
+ }
+}
+
+/// A struct that owns the tokenizer result from a given cursor.
+pub struct RawTokens<'a> {
+ cursor: &'a Cursor,
+ tu: CXTranslationUnit,
+ tokens: *mut CXToken,
+ token_count: c_uint,
+}
+
+impl<'a> RawTokens<'a> {
+ fn new(cursor: &'a Cursor) -> Self {
+ let mut tokens = ptr::null_mut();
+ let mut token_count = 0;
+ let range = cursor.extent();
+ let tu = unsafe {
+ clang_Cursor_getTranslationUnit(cursor.x)
+ };
+ unsafe { clang_tokenize(tu, range, &mut tokens, &mut token_count) };
+ Self { cursor, tu, tokens, token_count }
+ }
+
+ fn as_slice(&self) -> &[CXToken] {
+ if self.tokens.is_null() {
+ return &[];
+ }
+ unsafe { slice::from_raw_parts(self.tokens, self.token_count as usize) }
+ }
+
+ /// Get an iterator over these tokens.
+ pub fn iter(&self) -> ClangTokenIterator {
+ ClangTokenIterator {
+ tu: self.tu,
+ raw: self.as_slice().iter(),
+ }
+ }
+}
+
+impl<'a> Drop for RawTokens<'a> {
+ fn drop(&mut self) {
+ if !self.tokens.is_null() {
+ unsafe {
+ clang_disposeTokens(self.tu, self.tokens, self.token_count as c_uint);
}
- clang_disposeTokens(tu, token_ptr, num_tokens);
}
- Some(tokens)
}
+}
- /// Gets the tokens that correspond to that cursor as `cexpr` tokens.
- pub fn cexpr_tokens(self) -> Option<Vec<cexpr::token::Token>> {
- use cexpr::token;
+/// A raw clang token, that exposes only the kind and spelling. This is a
+/// slightly more convenient version of `CXToken` which owns the spelling
+/// string.
+#[derive(Debug)]
+pub struct ClangToken {
+ spelling: CXString,
+ /// The kind of token, this is the same as the relevant member from
+ /// `CXToken`.
+ pub kind: CXTokenKind,
+}
- self.tokens().map(|tokens| {
- tokens
- .into_iter()
- .filter_map(|token| {
- let kind = match token.kind {
- CXToken_Punctuation => token::Kind::Punctuation,
- CXToken_Literal => token::Kind::Literal,
- CXToken_Identifier => token::Kind::Identifier,
- CXToken_Keyword => token::Kind::Keyword,
- // NB: cexpr is not too happy about comments inside
- // expressions, so we strip them down here.
- CXToken_Comment => return None,
- _ => {
- error!("Found unexpected token kind: {:?}", token);
- return None;
- }
- };
-
- Some(token::Token {
- kind: kind,
- raw: token.spelling.into_bytes().into_boxed_slice(),
- })
- })
- .collect::<Vec<_>>()
- })
+impl ClangToken {
+ /// Get the token spelling, without being converted to utf-8.
+ pub fn spelling(&self) -> &[u8] {
+ let c_str = unsafe {
+ CStr::from_ptr(clang_getCString(self.spelling) as *const _)
+ };
+ c_str.to_bytes()
+ }
+}
+
+impl Drop for ClangToken {
+ fn drop(&mut self) {
+ unsafe { clang_disposeString(self.spelling) }
+ }
+}
+
+/// An iterator over a set of Tokens.
+pub struct ClangTokenIterator<'a> {
+ tu: CXTranslationUnit,
+ raw: slice::Iter<'a, CXToken>,
+}
+
+impl<'a> Iterator for ClangTokenIterator<'a> {
+ type Item = ClangToken;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let raw = self.raw.next()?;
+ unsafe {
+ let kind = clang_getTokenKind(*raw);
+ let spelling = clang_getTokenSpelling(self.tu, *raw);
+ Some(ClangToken { kind, spelling })
+ }
}
}
diff --git a/src/ir/context.rs b/src/ir/context.rs
index 063c79f0..f8b4f54a 100644
--- a/src/ir/context.rs
+++ b/src/ir/context.rs
@@ -2163,21 +2163,17 @@ If you encounter an error missing from this list, please file an issue or a PR!"
let mut module_name = None;
let spelling = cursor.spelling();
- if !spelling.is_empty()
- {
+ if !spelling.is_empty() {
module_name = Some(spelling)
}
- let tokens = match cursor.tokens() {
- Some(tokens) => tokens,
- None => return (module_name, ModuleKind::Normal),
- };
+ let tokens = cursor.tokens();
let mut iter = tokens.iter();
let mut kind = ModuleKind::Normal;
let mut found_namespace_keyword = false;
while let Some(token) = iter.next() {
- match &*token.spelling {
- "inline" => {
+ match token.spelling() {
+ b"inline" => {
assert!(!found_namespace_keyword);
assert!(kind != ModuleKind::Inline);
kind = ModuleKind::Inline;
@@ -2192,16 +2188,16 @@ If you encounter an error missing from this list, please file an issue or a PR!"
//
// Fortunately enough, inline nested namespace specifiers aren't
// a thing, and are invalid C++ :)
- "namespace" | "::" => {
+ b"namespace" | b"::" => {
found_namespace_keyword = true;
}
- "{" => {
+ b"{" => {
assert!(found_namespace_keyword);
break;
}
name if found_namespace_keyword => {
if module_name.is_none() {
- module_name = Some(name.to_owned());
+ module_name = Some(String::from_utf8_lossy(name).into_owned());
}
break;
}
diff --git a/src/ir/var.rs b/src/ir/var.rs
index 14f133fd..2180a1b8 100644
--- a/src/ir/var.rs
+++ b/src/ir/var.rs
@@ -309,7 +309,7 @@ fn parse_macro(
) -> Option<(Vec<u8>, cexpr::expr::EvalResult)> {
use cexpr::expr;
- let mut cexpr_tokens = cursor.cexpr_tokens()?;
+ let mut cexpr_tokens = cursor.cexpr_tokens();
let parser = expr::IdentifierParser::new(ctx.parsed_macros());
@@ -338,7 +338,7 @@ fn parse_int_literal_tokens(cursor: &clang::Cursor) -> Option<i64> {
use cexpr::expr;
use cexpr::expr::EvalResult;
- let cexpr_tokens = cursor.cexpr_tokens()?;
+ let cexpr_tokens = cursor.cexpr_tokens();
// TODO(emilio): We can try to parse other kinds of literals.
match expr::expr(&cexpr_tokens) {