Skip to content

Commit

Permalink
New C++ function name parsing logic (Resubmit)
Browse files Browse the repository at this point in the history
Current implementation of CPlusPlusLanguage::MethodName::Parse() doesn't
get anywhere close to covering full extent of possible function declarations.
It causes incorrect behavior in avoid-stepping and sometimes messes
printing of thread backtrace.

This change implements more methodical parsing logic based on clang
lexer and simple recursive parser.

Examples:
void std::vector<Class, std::allocator<Class>>::_M_emplace_back_aux<Class const&>(Class const&)
void (*&std::_Any_data::_M_access<void (*)()>())()

Previous version of this change (D31451) was rolled back due to an issue
with Objective-C selectors being incorrectly recognized as a C++ identifier.

Differential Revision: https://reviews.llvm.org/D31451

llvm-svn: 299721
  • Loading branch information
Djuffin committed Apr 6, 2017
1 parent 6129887 commit a633ee6
Show file tree
Hide file tree
Showing 7 changed files with 1,010 additions and 129 deletions.
6 changes: 6 additions & 0 deletions lldb/lldb.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,7 @@
49DCF702170E70120092F75E /* Materializer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49DCF700170E70120092F75E /* Materializer.cpp */; };
49DEF1251CD7C6DF006A7C7D /* BlockPointer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49DEF11F1CD7BD90006A7C7D /* BlockPointer.cpp */; };
49E4F66B1C9CAD16008487EA /* DiagnosticManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49E4F6681C9CAD12008487EA /* DiagnosticManager.cpp */; };
49F811F31E931B2100F4E163 /* CPlusPlusNameParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 49F811EF1E931B1500F4E163 /* CPlusPlusNameParser.cpp */; };
4C0083401B9F9BA900D5CF24 /* UtilityFunction.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C00833F1B9F9BA900D5CF24 /* UtilityFunction.cpp */; };
4C2479BD1BA39295009C9A7B /* FunctionCaller.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4C0083321B9A5DE200D5CF24 /* FunctionCaller.cpp */; };
4C3ADCD61810D88B00357218 /* BreakpointResolverFileRegex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4CAA56141422D986001FFA01 /* BreakpointResolverFileRegex.cpp */; };
Expand Down Expand Up @@ -2474,6 +2475,8 @@
49EC3E9C118F90D400B1265E /* ThreadPlanCallFunction.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ThreadPlanCallFunction.h; path = include/lldb/Target/ThreadPlanCallFunction.h; sourceTree = "<group>"; };
49F1A74511B3388F003ED505 /* ClangExpressionDeclMap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ClangExpressionDeclMap.cpp; path = ExpressionParser/Clang/ClangExpressionDeclMap.cpp; sourceTree = "<group>"; };
49F1A74911B338AE003ED505 /* ClangExpressionDeclMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ClangExpressionDeclMap.h; path = ExpressionParser/Clang/ClangExpressionDeclMap.h; sourceTree = "<group>"; };
49F811EF1E931B1500F4E163 /* CPlusPlusNameParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = CPlusPlusNameParser.cpp; path = Language/CPlusPlus/CPlusPlusNameParser.cpp; sourceTree = "<group>"; };
49F811F01E931B1500F4E163 /* CPlusPlusNameParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CPlusPlusNameParser.h; path = Language/CPlusPlus/CPlusPlusNameParser.h; sourceTree = "<group>"; };
4C00832C1B9A58A700D5CF24 /* Expression.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Expression.h; path = include/lldb/Expression/Expression.h; sourceTree = "<group>"; };
4C00832D1B9A58A700D5CF24 /* FunctionCaller.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FunctionCaller.h; path = include/lldb/Expression/FunctionCaller.h; sourceTree = "<group>"; };
4C00832E1B9A58A700D5CF24 /* UserExpression.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UserExpression.h; path = include/lldb/Expression/UserExpression.h; sourceTree = "<group>"; };
Expand Down Expand Up @@ -6108,6 +6111,8 @@
945261B01B9A11BE00BF138D /* Formatters */,
94B6385C1B8FB174004FE1E4 /* CPlusPlusLanguage.h */,
94B6385B1B8FB174004FE1E4 /* CPlusPlusLanguage.cpp */,
49F811F01E931B1500F4E163 /* CPlusPlusNameParser.h */,
49F811EF1E931B1500F4E163 /* CPlusPlusNameParser.cpp */,
);
name = CPlusPlus;
sourceTree = "<group>";
Expand Down Expand Up @@ -7077,6 +7082,7 @@
2689FFF713353DB600698AC0 /* BreakpointLocation.cpp in Sources */,
2654A68D1E552D1500DA1013 /* PseudoTerminal.cpp in Sources */,
2689FFF913353DB600698AC0 /* BreakpointLocationCollection.cpp in Sources */,
49F811F31E931B2100F4E163 /* CPlusPlusNameParser.cpp in Sources */,
2689FFFB13353DB600698AC0 /* BreakpointLocationList.cpp in Sources */,
2689FFFD13353DB600698AC0 /* BreakpointOptions.cpp in Sources */,
2689FFFF13353DB600698AC0 /* BreakpointResolver.cpp in Sources */,
Expand Down
1 change: 1 addition & 0 deletions lldb/source/Plugins/Language/CPlusPlus/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
add_lldb_library(lldbPluginCPlusPlusLanguage PLUGIN
BlockPointer.cpp
CPlusPlusLanguage.cpp
CPlusPlusNameParser.cpp
CxxStringTypes.cpp
LibCxx.cpp
LibCxxAtomic.cpp
Expand Down
171 changes: 67 additions & 104 deletions lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

// Other libraries and framework includes
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Threading.h"

// Project includes
#include "lldb/Core/PluginManager.h"
Expand All @@ -36,6 +35,7 @@
#include "lldb/Utility/RegularExpression.h"

#include "BlockPointer.h"
#include "CPlusPlusNameParser.h"
#include "CxxStringTypes.h"
#include "LibCxx.h"
#include "LibCxxAtomic.h"
Expand Down Expand Up @@ -85,15 +85,14 @@ void CPlusPlusLanguage::MethodName::Clear() {
m_context = llvm::StringRef();
m_arguments = llvm::StringRef();
m_qualifiers = llvm::StringRef();
m_type = eTypeInvalid;
m_parsed = false;
m_parse_error = false;
}

bool ReverseFindMatchingChars(const llvm::StringRef &s,
const llvm::StringRef &left_right_chars,
size_t &left_pos, size_t &right_pos,
size_t pos = llvm::StringRef::npos) {
static bool ReverseFindMatchingChars(const llvm::StringRef &s,
const llvm::StringRef &left_right_chars,
size_t &left_pos, size_t &right_pos,
size_t pos = llvm::StringRef::npos) {
assert(left_right_chars.size() == 2);
left_pos = llvm::StringRef::npos;
const char left_char = left_right_chars[0];
Expand All @@ -119,10 +118,9 @@ bool ReverseFindMatchingChars(const llvm::StringRef &s,
return false;
}

static bool IsValidBasename(const llvm::StringRef &basename) {
// Check that the basename matches with the following regular expression or is
// an operator name:
// "^~?([A-Za-z_][A-Za-z_0-9]*)(<.*>)?$"
static bool IsTrivialBasename(const llvm::StringRef &basename) {
// Check that the basename matches with the following regular expression
// "^~?([A-Za-z_][A-Za-z_0-9]*)$"
// We are using a hand written implementation because it is significantly more
// efficient then
// using the general purpose regular expression library.
Expand All @@ -149,100 +147,69 @@ static bool IsValidBasename(const llvm::StringRef &basename) {
if (idx == basename.size())
return true;

// Check for basename with template arguments
// TODO: Improve the quality of the validation with validating the template
// arguments
if (basename[idx] == '<' && basename.back() == '>')
return true;
return false;
}

// Check if the basename is a vaild C++ operator name
if (!basename.startswith("operator"))
return false;
bool CPlusPlusLanguage::MethodName::TrySimplifiedParse() {
// This method tries to parse simple method definitions
// which are presumably most comman in user programs.
// Definitions that can be parsed by this function don't have return types
// and templates in the name.
// A::B::C::fun(std::vector<T> &) const
size_t arg_start, arg_end;
llvm::StringRef full(m_full.GetCString());
llvm::StringRef parens("()", 2);
if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) {
m_arguments = full.substr(arg_start, arg_end - arg_start + 1);
if (arg_end + 1 < full.size())
m_qualifiers = full.substr(arg_end + 1).ltrim();

if (arg_start == 0)
return false;
size_t basename_end = arg_start;
size_t context_start = 0;
size_t context_end = full.rfind(':', basename_end);
if (context_end == llvm::StringRef::npos)
m_basename = full.substr(0, basename_end);
else {
if (context_start < context_end)
m_context = full.substr(context_start, context_end - 1 - context_start);
const size_t basename_begin = context_end + 1;
m_basename = full.substr(basename_begin, basename_end - basename_begin);
}

static RegularExpression g_operator_regex(
llvm::StringRef("^(operator)( "
"?)([A-Za-z_][A-Za-z_0-9]*|\\(\\)|"
"\\[\\]|[\\^<>=!\\/"
"*+-]+)(<.*>)?(\\[\\])?$"));
std::string basename_str(basename.str());
return g_operator_regex.Execute(basename_str, nullptr);
if (IsTrivialBasename(m_basename)) {
return true;
} else {
// The C++ basename doesn't match our regular expressions so this can't
// be a valid C++ method, clear everything out and indicate an error
m_context = llvm::StringRef();
m_basename = llvm::StringRef();
m_arguments = llvm::StringRef();
m_qualifiers = llvm::StringRef();
return false;
}
}
return false;
}

void CPlusPlusLanguage::MethodName::Parse() {
if (!m_parsed && m_full) {
// ConstString mangled;
// m_full.GetMangledCounterpart(mangled);
// printf ("\n parsing = '%s'\n", m_full.GetCString());
// if (mangled)
// printf (" mangled = '%s'\n", mangled.GetCString());
m_parse_error = false;
m_parsed = true;
llvm::StringRef full(m_full.GetCString());

size_t arg_start, arg_end;
llvm::StringRef parens("()", 2);
if (ReverseFindMatchingChars(full, parens, arg_start, arg_end)) {
m_arguments = full.substr(arg_start, arg_end - arg_start + 1);
if (arg_end + 1 < full.size())
m_qualifiers = full.substr(arg_end + 1);
if (arg_start > 0) {
size_t basename_end = arg_start;
size_t context_start = 0;
size_t context_end = llvm::StringRef::npos;
if (basename_end > 0 && full[basename_end - 1] == '>') {
// TODO: handle template junk...
// Templated function
size_t template_start, template_end;
llvm::StringRef lt_gt("<>", 2);
if (ReverseFindMatchingChars(full, lt_gt, template_start,
template_end, basename_end)) {
// Check for templated functions that include return type like:
// 'void foo<Int>()'
context_start = full.rfind(' ', template_start);
if (context_start == llvm::StringRef::npos)
context_start = 0;
else
++context_start;

context_end = full.rfind(':', template_start);
if (context_end == llvm::StringRef::npos ||
context_end < context_start)
context_end = context_start;
} else {
context_end = full.rfind(':', basename_end);
}
} else if (context_end == llvm::StringRef::npos) {
context_end = full.rfind(':', basename_end);
}

if (context_end == llvm::StringRef::npos)
m_basename = full.substr(0, basename_end);
else {
if (context_start < context_end)
m_context =
full.substr(context_start, context_end - 1 - context_start);
const size_t basename_begin = context_end + 1;
m_basename =
full.substr(basename_begin, basename_end - basename_begin);
}
m_type = eTypeUnknownMethod;
if (TrySimplifiedParse()) {
m_parse_error = false;
} else {
CPlusPlusNameParser parser(m_full.GetStringRef());
if (auto function = parser.ParseAsFunctionDefinition()) {
m_basename = function.getValue().name.basename;
m_context = function.getValue().name.context;
m_arguments = function.getValue().arguments;
m_qualifiers = function.getValue().qualifiers;
m_parse_error = false;
} else {
m_parse_error = true;
return;
}

if (!IsValidBasename(m_basename)) {
// The C++ basename doesn't match our regular expressions so this can't
// be a valid C++ method, clear everything out and indicate an error
m_context = llvm::StringRef();
m_basename = llvm::StringRef();
m_arguments = llvm::StringRef();
m_qualifiers = llvm::StringRef();
m_parse_error = true;
}
} else {
m_parse_error = true;
}
m_parsed = true;
}
}

Expand Down Expand Up @@ -273,14 +240,13 @@ llvm::StringRef CPlusPlusLanguage::MethodName::GetQualifiers() {
std::string CPlusPlusLanguage::MethodName::GetScopeQualifiedName() {
if (!m_parsed)
Parse();
if (m_basename.empty() || m_context.empty())
return std::string();
if (m_context.empty())
return m_basename;

std::string res;
res += m_context;
res += "::";
res += m_basename;

return res;
}

Expand All @@ -296,13 +262,10 @@ bool CPlusPlusLanguage::IsCPPMangledName(const char *name) {

bool CPlusPlusLanguage::ExtractContextAndIdentifier(
const char *name, llvm::StringRef &context, llvm::StringRef &identifier) {
static RegularExpression g_basename_regex(llvm::StringRef(
"^(([A-Za-z_][A-Za-z_0-9]*::)*)(~?[A-Za-z_~][A-Za-z_0-9]*)$"));
RegularExpression::Match match(4);
if (g_basename_regex.Execute(llvm::StringRef::withNullAsEmpty(name),
&match)) {
match.GetMatchAtIndex(name, 1, context);
match.GetMatchAtIndex(name, 3, identifier);
CPlusPlusNameParser parser(name);
if (auto full_name = parser.ParseAsFullName()) {
identifier = full_name.getValue().basename;
context = full_name.getValue().context;
return true;
}
return false;
Expand Down
19 changes: 4 additions & 15 deletions lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,13 @@ class CPlusPlusLanguage : public Language {
public:
class MethodName {
public:
enum Type {
eTypeInvalid,
eTypeUnknownMethod,
eTypeClassMethod,
eTypeInstanceMethod
};

MethodName()
: m_full(), m_basename(), m_context(), m_arguments(), m_qualifiers(),
m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {}
m_parsed(false), m_parse_error(false) {}

MethodName(const ConstString &s)
: m_full(s), m_basename(), m_context(), m_arguments(), m_qualifiers(),
m_type(eTypeInvalid), m_parsed(false), m_parse_error(false) {}
m_parsed(false), m_parse_error(false) {}

void Clear();

Expand All @@ -51,13 +44,9 @@ class CPlusPlusLanguage : public Language {
Parse();
if (m_parse_error)
return false;
if (m_type == eTypeInvalid)
return false;
return (bool)m_full;
}

Type GetType() const { return m_type; }

const ConstString &GetFullName() const { return m_full; }

std::string GetScopeQualifiedName();
Expand All @@ -72,6 +61,7 @@ class CPlusPlusLanguage : public Language {

protected:
void Parse();
bool TrySimplifiedParse();

ConstString m_full; // Full name:
// "lldb::SBTarget::GetBreakpointAtIndex(unsigned int)
Expand All @@ -80,7 +70,6 @@ class CPlusPlusLanguage : public Language {
llvm::StringRef m_context; // Decl context: "lldb::SBTarget"
llvm::StringRef m_arguments; // Arguments: "(unsigned int)"
llvm::StringRef m_qualifiers; // Qualifiers: "const"
Type m_type;
bool m_parsed;
bool m_parse_error;
};
Expand Down Expand Up @@ -121,7 +110,7 @@ class CPlusPlusLanguage : public Language {
// If the name is a lone C identifier (e.g. C) or a qualified C identifier
// (e.g. A::B::C) it will return true,
// and identifier will be the identifier (C and C respectively) and the
// context will be "" and "A::B::" respectively.
// context will be "" and "A::B" respectively.
// If the name fails the heuristic matching for a qualified or unqualified
// C/C++ identifier, then it will return false
// and identifier and context will be unchanged.
Expand Down
Loading

0 comments on commit a633ee6

Please sign in to comment.