diff options
Diffstat (limited to 'src/clang.rs')
-rwxr-xr-x | src/clang.rs | 69 |
1 files changed, 63 insertions, 6 deletions
diff --git a/src/clang.rs b/src/clang.rs index e6d78123..177d7dab 100755 --- a/src/clang.rs +++ b/src/clang.rs @@ -4,8 +4,9 @@ #![allow(non_upper_case_globals, dead_code)] +use cexpr; use clangll::*; -use std::{mem, ptr}; +use std::{mem, ptr, slice}; use std::ffi::{CStr, CString}; use std::fmt; use std::hash::Hash; @@ -1051,18 +1052,18 @@ impl TranslationUnit { let range = cursor.extent(); let mut tokens = vec![]; unsafe { - let mut token_ptr = ::std::ptr::null_mut(); + let mut token_ptr = ptr::null_mut(); let mut num_tokens: c_uint = 0; clang_tokenize(self.x, range, &mut token_ptr, &mut num_tokens); if token_ptr.is_null() { return None; } - let token_array = ::std::slice::from_raw_parts(token_ptr, - num_tokens as usize); + + let token_array = slice::from_raw_parts(token_ptr, + num_tokens as usize); for &token in token_array.iter() { let kind = clang_getTokenKind(token); - let spelling: String = clang_getTokenSpelling(self.x, token) - .into(); + let spelling = clang_getTokenSpelling(self.x, token).into(); tokens.push(Token { kind: kind, @@ -1073,6 +1074,62 @@ impl TranslationUnit { } Some(tokens) } + + /// Convert a set of tokens from clang into `cexpr` tokens, for further + /// processing. + pub fn cexpr_tokens(&self, + cursor: &Cursor) + -> Option<Vec<cexpr::token::Token>> { + use cexpr::token; + + let mut tokens = match self.tokens(cursor) { + Some(tokens) => tokens, + None => return None, + }; + + // FIXME(emilio): LLVM 3.9 at least always include an extra token for no + // good reason (except if we're at EOF). So we do this kind of hack, + // where we skip known-to-cause problems trailing punctuation and + // trailing keywords. + // + // This is sort of unfortunate, though :(. + // + // I'll try to get it fixed in LLVM if I have the time to submit a + // patch. + let mut trim_last_token = false; + if let Some(token) = tokens.last() { + // The starting of the next macro. + trim_last_token |= token.spelling == "#" && + token.kind == CXToken_Punctuation; + + // A following keyword of any kind, like a following declaration. + trim_last_token |= token.kind == CXToken_Keyword; + } + + if trim_last_token { + tokens.pop().unwrap(); + } + + Some(tokens.into_iter() + .filter_map(|token| { + let kind = match token.kind { + CXToken_Punctuation => token::Kind::Punctuation, + CXToken_Literal => token::Kind::Literal, + CXToken_Identifier => token::Kind::Identifier, + CXToken_Keyword => token::Kind::Keyword, + // NB: cexpr is not too happy about comments inside + // expressions, so we strip them down here. + CXToken_Comment => return None, + _ => panic!("Found unexpected token kind: {}", token.kind), + }; + + Some(token::Token { + kind: kind, + raw: token.spelling.into_bytes().into_boxed_slice(), + }) + }) + .collect::<Vec<_>>()) + } } impl Drop for TranslationUnit { |