1 : // Copyright (c) 2010 Google Inc.
2 : // All rights reserved.
3 : //
4 : // Redistribution and use in source and binary forms, with or without
5 : // modification, are permitted provided that the following conditions are
6 : // met:
7 : //
8 : // * Redistributions of source code must retain the above copyright
9 : // notice, this list of conditions and the following disclaimer.
10 : // * Redistributions in binary form must reproduce the above
11 : // copyright notice, this list of conditions and the following disclaimer
12 : // in the documentation and/or other materials provided with the
13 : // distribution.
14 : // * Neither the name of Google Inc. nor the names of its
15 : // contributors may be used to endorse or promote products derived from
16 : // this software without specific prior written permission.
17 : //
18 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 :
30 : // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31 :
32 : // Implement the DwarfCUToModule class; see dwarf_cu_to_module.h.
33 :
34 : #include "common/dwarf_cu_to_module.h"
35 :
36 : #include <assert.h>
37 :
38 : #include <algorithm>
39 : #include <set>
40 : #include <utility>
41 :
42 : #include "common/dwarf_line_to_module.h"
43 :
44 : namespace google_breakpad {
45 :
46 : using std::map;
47 : using std::pair;
48 : using std::set;
49 : using std::vector;
50 :
51 : // Data provided by a DWARF specification DIE.
52 : //
53 : // In DWARF, the DIE for a definition may contain a DW_AT_specification
54 : // attribute giving the offset of the corresponding declaration DIE, and
55 : // the definition DIE may omit information given in the declaration. For
56 : // example, it's common for a function's address range to appear only in
57 : // its definition DIE, but its name to appear only in its declaration
58 : // DIE.
59 : //
60 : // The dumper needs to be able to follow DW_AT_specification links to
61 : // bring all this information together in a FUNC record. Conveniently,
62 : // DIEs that are the target of such links have a DW_AT_declaration flag
63 : // set, so we can identify them when we first see them, and record their
64 : // contents for later reference.
65 : //
66 : // A Specification holds information gathered from a declaration DIE that
67 : // we may need if we find a DW_AT_specification link pointing to it.
68 53184096 : struct DwarfCUToModule::Specification {
69 : // The name of the enclosing scope, or the empty string if there is none.
70 : string enclosing_name;
71 :
72 : // The name for the specification DIE itself, without any enclosing
73 : // name components.
74 : string unqualified_name;
75 : };
76 :
77 : // An abstract origin -- base definition of an inline function.
78 958440 : struct AbstractOrigin {
79 136920 : AbstractOrigin() : name() {}
80 136920 : AbstractOrigin(const string& name) : name(name) {}
81 :
82 : string name;
83 : };
84 :
85 : typedef map<uint64, AbstractOrigin> AbstractOriginByOffset;
86 :
87 : // Data global to the DWARF-bearing file that is private to the
88 : // DWARF-to-Module process.
89 248 : struct DwarfCUToModule::FilePrivate {
90 : // A set of strings used in this CU. Before storing a string in one of
91 : // our data structures, insert it into this set, and then use the string
92 : // from the set.
93 : //
94 : // Because std::string uses reference counting internally, simply using
95 : // strings from this set, even if passed by value, assigned, or held
96 : // directly in structures and containers (map<string, ...>, for example),
97 : // causes those strings to share a single instance of each distinct piece
98 : // of text.
99 : set<string> common_strings;
100 :
101 : // A map from offsets of DIEs within the .debug_info section to
102 : // Specifications describing those DIEs. Specification references can
103 : // cross compilation unit boundaries.
104 : SpecificationByOffset specifications;
105 :
106 : AbstractOriginByOffset origins;
107 : };
108 :
109 124 : DwarfCUToModule::FileContext::FileContext(const string &filename_arg,
110 : Module *module_arg)
111 124 : : filename(filename_arg), module(module_arg) {
112 124 : file_private = new FilePrivate();
113 124 : }
114 :
115 248 : DwarfCUToModule::FileContext::~FileContext() {
116 124 : delete file_private;
117 124 : }
118 :
119 : // Information global to the particular compilation unit we're
120 : // parsing. This is for data shared across the CU's entire DIE tree,
121 : // and parameters from the code invoking the CU parser.
122 : struct DwarfCUToModule::CUContext {
123 4773 : CUContext(FileContext *file_context_arg, WarningReporter *reporter_arg)
124 : : file_context(file_context_arg),
125 : reporter(reporter_arg),
126 4773 : language(Language::CPlusPlus) { }
127 9546 : ~CUContext() {
128 9546 : for (vector<Module::Function *>::iterator it = functions.begin();
129 4773 : it != functions.end(); it++)
130 0 : delete *it;
131 4773 : };
132 :
133 : // The DWARF-bearing file into which this CU was incorporated.
134 : FileContext *file_context;
135 :
136 : // For printing error messages.
137 : WarningReporter *reporter;
138 :
139 : // The source language of this compilation unit.
140 : const Language *language;
141 :
142 : // The functions defined in this compilation unit. We accumulate
143 : // them here during parsing. Then, in DwarfCUToModule::Finish, we
144 : // assign them lines and add them to file_context->module.
145 : //
146 : // Destroying this destroys all the functions this vector points to.
147 : vector<Module::Function *> functions;
148 : };
149 :
150 : // Information about the context of a particular DIE. This is for
151 : // information that changes as we descend the tree towards the leaves:
152 : // the containing classes/namespaces, etc.
153 2364836 : struct DwarfCUToModule::DIEContext {
154 : // The fully-qualified name of the context. For example, for a
155 : // tree like:
156 : //
157 : // DW_TAG_namespace Foo
158 : // DW_TAG_class Bar
159 : // DW_TAG_subprogram Baz
160 : //
161 : // in a C++ compilation unit, the DIEContext's name for the
162 : // DW_TAG_subprogram DIE would be "Foo::Bar". The DIEContext's
163 : // name for the DW_TAG_namespace DIE would be "".
164 : string name;
165 : };
166 :
167 : // An abstract base class for all the dumper's DIE handlers.
168 7390771 : class DwarfCUToModule::GenericDIEHandler: public dwarf2reader::DIEHandler {
169 : public:
170 : // Create a handler for the DIE at OFFSET whose compilation unit is
171 : // described by CU_CONTEXT, and whose immediate context is described
172 : // by PARENT_CONTEXT.
173 7390771 : GenericDIEHandler(CUContext *cu_context, DIEContext *parent_context,
174 : uint64 offset)
175 : : cu_context_(cu_context),
176 : parent_context_(parent_context),
177 : offset_(offset),
178 : declaration_(false),
179 7390771 : specification_(NULL) { }
180 :
181 : // Derived classes' ProcessAttributeUnsigned can defer to this to
182 : // handle DW_AT_declaration, or simply not override it.
183 : void ProcessAttributeUnsigned(enum DwarfAttribute attr,
184 : enum DwarfForm form,
185 : uint64 data);
186 :
187 : // Derived classes' ProcessAttributeReference can defer to this to
188 : // handle DW_AT_specification, or simply not override it.
189 : void ProcessAttributeReference(enum DwarfAttribute attr,
190 : enum DwarfForm form,
191 : uint64 data);
192 :
193 : // Derived classes' ProcessAttributeReference can defer to this to
194 : // handle DW_AT_specification, or simply not override it.
195 : void ProcessAttributeString(enum DwarfAttribute attr,
196 : enum DwarfForm form,
197 : const string &data);
198 :
199 : protected:
200 : // Compute and return the fully-qualified name of the DIE. If this
201 : // DIE is a declaration DIE, to be cited by other DIEs'
202 : // DW_AT_specification attributes, record its enclosing name and
203 : // unqualified name in the specification table.
204 : //
205 : // Use this from EndAttributes member functions, not ProcessAttribute*
206 : // functions; only the former can be sure that all the DIE's attributes
207 : // have been seen.
208 : string ComputeQualifiedName();
209 :
210 : CUContext *cu_context_;
211 : DIEContext *parent_context_;
212 : uint64 offset_;
213 :
214 : // If this DIE has a DW_AT_declaration attribute, this is its value.
215 : // It is false on DIEs with no DW_AT_declaration attribute.
216 : bool declaration_;
217 :
218 : // If this DIE has a DW_AT_specification attribute, this is the
219 : // Specification structure for the DIE the attribute refers to.
220 : // Otherwise, this is NULL.
221 : Specification *specification_;
222 :
223 : // The value of the DW_AT_name attribute, or the empty string if the
224 : // DIE has no such attribute.
225 : string name_attribute_;
226 : };
227 :
228 26386874 : void DwarfCUToModule::GenericDIEHandler::ProcessAttributeUnsigned(
229 : enum DwarfAttribute attr,
230 : enum DwarfForm form,
231 : uint64 data) {
232 26386874 : switch (attr) {
233 5909344 : case dwarf2reader::DW_AT_declaration: declaration_ = (data != 0); break;
234 20477530 : default: break;
235 : }
236 26386874 : }
237 :
238 10836170 : void DwarfCUToModule::GenericDIEHandler::ProcessAttributeReference(
239 : enum DwarfAttribute attr,
240 : enum DwarfForm form,
241 : uint64 data) {
242 10836170 : switch (attr) {
243 : case dwarf2reader::DW_AT_specification: {
244 : // Find the Specification to which this attribute refers, and
245 : // set specification_ appropriately. We could do more processing
246 : // here, but it's better to leave the real work to our
247 : // EndAttribute member function, at which point we know we have
248 : // seen all the DIE's attributes.
249 686217 : FileContext *file_context = cu_context_->file_context;
250 : SpecificationByOffset *specifications
251 686217 : = &file_context->file_private->specifications;
252 686217 : SpecificationByOffset::iterator spec = specifications->find(data);
253 686217 : if (spec != specifications->end()) {
254 686217 : specification_ = &spec->second;
255 : } else {
256 : // Technically, there's no reason a DW_AT_specification
257 : // couldn't be a forward reference, but supporting that would
258 : // be a lot of work (changing to a two-pass structure), and I
259 : // don't think any producers we care about ever emit such
260 : // things.
261 0 : cu_context_->reporter->UnknownSpecification(offset_, data);
262 : }
263 686217 : break;
264 : }
265 10149953 : default: break;
266 : }
267 10836170 : }
268 :
269 11090859 : void DwarfCUToModule::GenericDIEHandler::ProcessAttributeString(
270 : enum DwarfAttribute attr,
271 : enum DwarfForm form,
272 : const string &data) {
273 11090859 : switch (attr) {
274 : case dwarf2reader::DW_AT_name: {
275 : // Place the name in our global set of strings, and then use the
276 : // string from the set. Even though the assignment looks like a copy,
277 : // all the major std::string implementations use reference counting
278 : // internally, so the effect is to have all our data structures share
279 : // copies of strings whenever possible.
280 : pair<set<string>::iterator, bool> result =
281 6533745 : cu_context_->file_context->file_private->common_strings.insert(data);
282 6533745 : name_attribute_ = *result.first;
283 6533745 : break;
284 : }
285 4557114 : default: break;
286 : }
287 11090859 : }
288 :
289 7390771 : string DwarfCUToModule::GenericDIEHandler::ComputeQualifiedName() {
290 : // Find our unqualified name. If the DIE has its own DW_AT_name
291 : // attribute, then use that; otherwise, check our specification.
292 : const string *unqualified_name;
293 7390771 : if (name_attribute_.empty() && specification_)
294 686216 : unqualified_name = &specification_->unqualified_name;
295 : else
296 6704555 : unqualified_name = &name_attribute_;
297 :
298 : // Find the name of our enclosing context. If we have a
299 : // specification, it's the specification's enclosing context that
300 : // counts; otherwise, use this DIE's context.
301 : const string *enclosing_name;
302 7390771 : if (specification_)
303 686217 : enclosing_name = &specification_->enclosing_name;
304 : else
305 6704554 : enclosing_name = &parent_context_->name;
306 :
307 : // If this DIE was marked as a declaration, record its names in the
308 : // specification table.
309 7390771 : if (declaration_) {
310 5909344 : FileContext *file_context = cu_context_->file_context;
311 11818688 : Specification spec;
312 5909344 : spec.enclosing_name = *enclosing_name;
313 5909344 : spec.unqualified_name = *unqualified_name;
314 5909344 : file_context->file_private->specifications[offset_] = spec;
315 : }
316 :
317 : // Combine the enclosing name and unqualified name to produce our
318 : // own fully-qualified name.
319 : return cu_context_->language->MakeQualifiedName(*enclosing_name,
320 7390771 : *unqualified_name);
321 : }
322 :
323 : // A handler class for DW_TAG_subprogram DIEs.
324 12426252 : class DwarfCUToModule::FuncHandler: public GenericDIEHandler {
325 : public:
326 6213126 : FuncHandler(CUContext *cu_context, DIEContext *parent_context,
327 : uint64 offset)
328 : : GenericDIEHandler(cu_context, parent_context, offset),
329 6213126 : low_pc_(0), high_pc_(0), abstract_origin_(NULL), inline_(false) { }
330 : void ProcessAttributeUnsigned(enum DwarfAttribute attr,
331 : enum DwarfForm form,
332 : uint64 data);
333 : void ProcessAttributeSigned(enum DwarfAttribute attr,
334 : enum DwarfForm form,
335 : int64 data);
336 : void ProcessAttributeReference(enum DwarfAttribute attr,
337 : enum DwarfForm form,
338 : uint64 data);
339 :
340 : bool EndAttributes();
341 : void Finish();
342 :
343 : private:
344 : // The fully-qualified name, as derived from name_attribute_,
345 : // specification_, parent_context_. Computed in EndAttributes.
346 : string name_;
347 : uint64 low_pc_, high_pc_; // DW_AT_low_pc, DW_AT_high_pc
348 : const AbstractOrigin* abstract_origin_;
349 : bool inline_;
350 : };
351 :
352 25167872 : void DwarfCUToModule::FuncHandler::ProcessAttributeUnsigned(
353 : enum DwarfAttribute attr,
354 : enum DwarfForm form,
355 : uint64 data) {
356 25167872 : switch (attr) {
357 : // If this attribute is present at all --- even if its value is
358 : // DW_INL_not_inlined --- then GCC may cite it as someone else's
359 : // DW_AT_abstract_origin attribute.
360 136920 : case dwarf2reader::DW_AT_inline: inline_ = true; break;
361 :
362 617615 : case dwarf2reader::DW_AT_low_pc: low_pc_ = data; break;
363 617615 : case dwarf2reader::DW_AT_high_pc: high_pc_ = data; break;
364 : default:
365 23795722 : GenericDIEHandler::ProcessAttributeUnsigned(attr, form, data);
366 23795722 : break;
367 : }
368 25167872 : }
369 :
370 0 : void DwarfCUToModule::FuncHandler::ProcessAttributeSigned(
371 : enum DwarfAttribute attr,
372 : enum DwarfForm form,
373 : int64 data) {
374 0 : switch (attr) {
375 : // If this attribute is present at all --- even if its value is
376 : // DW_INL_not_inlined --- then GCC may cite it as someone else's
377 : // DW_AT_abstract_origin attribute.
378 0 : case dwarf2reader::DW_AT_inline: inline_ = true; break;
379 :
380 : default:
381 0 : break;
382 : }
383 0 : }
384 :
385 9993658 : void DwarfCUToModule::FuncHandler::ProcessAttributeReference(
386 : enum DwarfAttribute attr,
387 : enum DwarfForm form,
388 : uint64 data) {
389 9993658 : switch(attr) {
390 : case dwarf2reader::DW_AT_abstract_origin: {
391 : const AbstractOriginByOffset& origins =
392 138296 : cu_context_->file_context->file_private->origins;
393 138296 : AbstractOriginByOffset::const_iterator origin = origins.find(data);
394 138296 : if (origin != origins.end()) {
395 138296 : abstract_origin_ = &(origin->second);
396 : } else {
397 0 : cu_context_->reporter->UnknownAbstractOrigin(offset_, data);
398 : }
399 138296 : break;
400 : }
401 : default:
402 9855362 : GenericDIEHandler::ProcessAttributeReference(attr, form, data);
403 9855362 : break;
404 : }
405 9993658 : }
406 :
407 6213126 : bool DwarfCUToModule::FuncHandler::EndAttributes() {
408 : // Compute our name, and record a specification, if appropriate.
409 6213126 : name_ = ComputeQualifiedName();
410 6213126 : if (name_.empty() && abstract_origin_) {
411 138296 : name_ = abstract_origin_->name;
412 : }
413 6213126 : return true;
414 : }
415 :
416 6213126 : void DwarfCUToModule::FuncHandler::Finish() {
417 : // Did we collect the information we need? Not all DWARF function
418 : // entries have low and high addresses (for example, inlined
419 : // functions that were never used), but all the ones we're
420 : // interested in cover a non-empty range of bytes.
421 6213126 : if (low_pc_ < high_pc_) {
422 : // Create a Module::Function based on the data we've gathered, and
423 : // add it to the functions_ list.
424 617615 : Module::Function *func = new Module::Function;
425 617615 : func->name = name_;
426 617615 : func->address = low_pc_;
427 617615 : func->size = high_pc_ - low_pc_;
428 617615 : func->parameter_size = 0;
429 617615 : cu_context_->functions.push_back(func);
430 5595511 : } else if (inline_) {
431 273840 : AbstractOrigin origin(name_);
432 136920 : cu_context_->file_context->file_private->origins[offset_] = origin;
433 : }
434 6213126 : }
435 :
436 : // A handler for DIEs that contain functions and contribute a
437 : // component to their names: namespaces, classes, etc.
438 2355290 : class DwarfCUToModule::NamedScopeHandler: public GenericDIEHandler {
439 : public:
440 1177645 : NamedScopeHandler(CUContext *cu_context, DIEContext *parent_context,
441 : uint64 offset)
442 1177645 : : GenericDIEHandler(cu_context, parent_context, offset) { }
443 : bool EndAttributes();
444 : DIEHandler *FindChildHandler(uint64 offset, enum DwarfTag tag,
445 : const AttributeList &attrs);
446 :
447 : private:
448 : DIEContext child_context_; // A context for our children.
449 : };
450 :
451 1177645 : bool DwarfCUToModule::NamedScopeHandler::EndAttributes() {
452 1177645 : child_context_.name = ComputeQualifiedName();
453 1177645 : return true;
454 : }
455 :
456 8836815 : dwarf2reader::DIEHandler *DwarfCUToModule::NamedScopeHandler::FindChildHandler(
457 : uint64 offset,
458 : enum DwarfTag tag,
459 : const AttributeList &attrs) {
460 8836815 : switch (tag) {
461 : case dwarf2reader::DW_TAG_subprogram:
462 5395281 : return new FuncHandler(cu_context_, &child_context_, offset);
463 : case dwarf2reader::DW_TAG_namespace:
464 : case dwarf2reader::DW_TAG_class_type:
465 : case dwarf2reader::DW_TAG_structure_type:
466 : case dwarf2reader::DW_TAG_union_type:
467 518322 : return new NamedScopeHandler(cu_context_, &child_context_, offset);
468 : default:
469 2923212 : return NULL;
470 : }
471 : }
472 :
473 0 : void DwarfCUToModule::WarningReporter::CUHeading() {
474 0 : if (printed_cu_header_)
475 0 : return;
476 : fprintf(stderr, "%s: in compilation unit '%s' (offset 0x%llx):\n",
477 0 : filename_.c_str(), cu_name_.c_str(), cu_offset_);
478 0 : printed_cu_header_ = true;
479 : }
480 :
481 0 : void DwarfCUToModule::WarningReporter::UnknownSpecification(uint64 offset,
482 : uint64 target) {
483 0 : CUHeading();
484 : fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_specification"
485 : " attribute referring to the die at offset 0x%llx, which either"
486 : " was not marked as a declaration, or comes later in the file\n",
487 0 : filename_.c_str(), offset, target);
488 0 : }
489 :
490 0 : void DwarfCUToModule::WarningReporter::UnknownAbstractOrigin(uint64 offset,
491 : uint64 target) {
492 0 : CUHeading();
493 : fprintf(stderr, "%s: the DIE at offset 0x%llx has a DW_AT_abstract_origin"
494 : " attribute referring to the die at offset 0x%llx, which either"
495 : " was not marked as an inline, or comes later in the file\n",
496 0 : filename_.c_str(), offset, target);
497 0 : }
498 :
499 0 : void DwarfCUToModule::WarningReporter::MissingSection(const string &name) {
500 0 : CUHeading();
501 : fprintf(stderr, "%s: warning: couldn't find DWARF '%s' section\n",
502 0 : filename_.c_str(), name.c_str());
503 0 : }
504 :
505 0 : void DwarfCUToModule::WarningReporter::BadLineInfoOffset(uint64 offset) {
506 0 : CUHeading();
507 : fprintf(stderr, "%s: warning: line number data offset beyond end"
508 : " of '.debug_line' section\n",
509 0 : filename_.c_str());
510 0 : }
511 :
512 0 : void DwarfCUToModule::WarningReporter::UncoveredHeading() {
513 0 : if (printed_unpaired_header_)
514 0 : return;
515 0 : CUHeading();
516 : fprintf(stderr, "%s: warning: skipping unpaired lines/functions:\n",
517 0 : filename_.c_str());
518 0 : printed_unpaired_header_ = true;
519 : }
520 :
521 368 : void DwarfCUToModule::WarningReporter::UncoveredFunction(
522 : const Module::Function &function) {
523 368 : if (!uncovered_warnings_enabled_)
524 368 : return;
525 0 : UncoveredHeading();
526 : fprintf(stderr, " function%s: %s\n",
527 : function.size == 0 ? " (zero-length)" : "",
528 0 : function.name.c_str());
529 : }
530 :
531 7063 : void DwarfCUToModule::WarningReporter::UncoveredLine(const Module::Line &line) {
532 7063 : if (!uncovered_warnings_enabled_)
533 7063 : return;
534 0 : UncoveredHeading();
535 : fprintf(stderr, " line%s: %s:%d at 0x%llx\n",
536 : (line.size == 0 ? " (zero-length)" : ""),
537 0 : line.file->name.c_str(), line.number, line.address);
538 : }
539 :
540 4773 : DwarfCUToModule::DwarfCUToModule(FileContext *file_context,
541 : LineToModuleFunctor *line_reader,
542 : WarningReporter *reporter)
543 4773 : : line_reader_(line_reader), has_source_line_info_(false) {
544 4773 : cu_context_ = new CUContext(file_context, reporter);
545 4773 : child_context_ = new DIEContext();
546 4773 : }
547 :
548 9546 : DwarfCUToModule::~DwarfCUToModule() {
549 4773 : delete cu_context_;
550 4773 : delete child_context_;
551 4773 : }
552 :
553 0 : void DwarfCUToModule::ProcessAttributeSigned(enum DwarfAttribute attr,
554 : enum DwarfForm form,
555 : int64 data) {
556 0 : switch (attr) {
557 : case dwarf2reader::DW_AT_language: // source language of this CU
558 0 : SetLanguage(static_cast<DwarfLanguage>(data));
559 0 : break;
560 : default:
561 0 : break;
562 : }
563 0 : }
564 :
565 23231 : void DwarfCUToModule::ProcessAttributeUnsigned(enum DwarfAttribute attr,
566 : enum DwarfForm form,
567 : uint64 data) {
568 23231 : switch (attr) {
569 : case dwarf2reader::DW_AT_stmt_list: // Line number information.
570 4773 : has_source_line_info_ = true;
571 4773 : source_line_offset_ = data;
572 4773 : break;
573 : case dwarf2reader::DW_AT_language: // source language of this CU
574 4773 : SetLanguage(static_cast<DwarfLanguage>(data));
575 4773 : break;
576 : default:
577 13685 : break;
578 : }
579 23231 : }
580 :
581 13848 : void DwarfCUToModule::ProcessAttributeString(enum DwarfAttribute attr,
582 : enum DwarfForm form,
583 : const string &data) {
584 13848 : if (attr == dwarf2reader::DW_AT_name)
585 4773 : cu_context_->reporter->SetCUName(data);
586 13848 : }
587 :
588 4773 : bool DwarfCUToModule::EndAttributes() {
589 4773 : return true;
590 : }
591 :
592 4607175 : dwarf2reader::DIEHandler *DwarfCUToModule::FindChildHandler(
593 : uint64 offset,
594 : enum DwarfTag tag,
595 : const AttributeList &attrs) {
596 4607175 : switch (tag) {
597 : case dwarf2reader::DW_TAG_subprogram:
598 817845 : return new FuncHandler(cu_context_, child_context_, offset);
599 : case dwarf2reader::DW_TAG_namespace:
600 : case dwarf2reader::DW_TAG_class_type:
601 : case dwarf2reader::DW_TAG_structure_type:
602 : case dwarf2reader::DW_TAG_union_type:
603 659323 : return new NamedScopeHandler(cu_context_, child_context_, offset);
604 : default:
605 3130007 : return NULL;
606 : }
607 : }
608 :
609 4773 : void DwarfCUToModule::SetLanguage(DwarfLanguage language) {
610 4773 : switch (language) {
611 : case dwarf2reader::DW_LANG_Java:
612 0 : cu_context_->language = Language::Java;
613 0 : break;
614 :
615 : // DWARF has no generic language code for assembly language; this is
616 : // what the GNU toolchain uses.
617 : case dwarf2reader::DW_LANG_Mips_Assembler:
618 50 : cu_context_->language = Language::Assembler;
619 50 : break;
620 :
621 : // C++ covers so many cases that it probably has some way to cope
622 : // with whatever the other languages throw at us. So make it the
623 : // default.
624 : //
625 : // Objective C and Objective C++ seem to create entries for
626 : // methods whose DW_AT_name values are already fully-qualified:
627 : // "-[Classname method:]". These appear at the top level.
628 : //
629 : // DWARF data for C should never include namespaces or functions
630 : // nested in struct types, but if it ever does, then C++'s
631 : // notation is probably not a bad choice for that.
632 : default:
633 : case dwarf2reader::DW_LANG_ObjC:
634 : case dwarf2reader::DW_LANG_ObjC_plus_plus:
635 : case dwarf2reader::DW_LANG_C:
636 : case dwarf2reader::DW_LANG_C89:
637 : case dwarf2reader::DW_LANG_C99:
638 : case dwarf2reader::DW_LANG_C_plus_plus:
639 4723 : cu_context_->language = Language::CPlusPlus;
640 4723 : break;
641 : }
642 4773 : }
643 :
644 4723 : void DwarfCUToModule::ReadSourceLines(uint64 offset) {
645 : const dwarf2reader::SectionMap §ion_map
646 4723 : = cu_context_->file_context->section_map;
647 : dwarf2reader::SectionMap::const_iterator map_entry
648 4723 : = section_map.find(".debug_line");
649 : // Mac OS X puts DWARF data in sections whose names begin with "__"
650 : // instead of ".".
651 4723 : if (map_entry == section_map.end())
652 0 : map_entry = section_map.find("__debug_line");
653 4723 : if (map_entry == section_map.end()) {
654 0 : cu_context_->reporter->MissingSection(".debug_line");
655 0 : return;
656 : }
657 4723 : const char *section_start = map_entry->second.first;
658 4723 : uint64 section_length = map_entry->second.second;
659 4723 : if (offset >= section_length) {
660 0 : cu_context_->reporter->BadLineInfoOffset(offset);
661 0 : return;
662 : }
663 : (*line_reader_)(section_start + offset, section_length - offset,
664 4723 : cu_context_->file_context->module, &lines_);
665 : }
666 :
667 : namespace {
668 : // Return true if ADDRESS falls within the range of ITEM.
669 : template <class T>
670 21033196 : inline bool within(const T &item, Module::Address address) {
671 : // Because Module::Address is unsigned, and unsigned arithmetic
672 : // wraps around, this will be false if ADDRESS falls before the
673 : // start of ITEM, or if it falls after ITEM's end.
674 21033196 : return address - item.address < item.size;
675 : }
676 : }
677 :
678 4723 : void DwarfCUToModule::AssignLinesToFunctions() {
679 4723 : vector<Module::Function *> *functions = &cu_context_->functions;
680 4723 : WarningReporter *reporter = cu_context_->reporter;
681 :
682 : // This would be simpler if we assumed that source line entries
683 : // don't cross function boundaries. However, there's no real reason
684 : // to assume that (say) a series of function definitions on the same
685 : // line wouldn't get coalesced into one line number entry. The
686 : // DWARF spec certainly makes no such promises.
687 : //
688 : // So treat the functions and lines as peers, and take the trouble
689 : // to compute their ranges' intersections precisely. In any case,
690 : // the hair here is a constant factor for performance; the
691 : // complexity from here on out is linear.
692 :
693 : // Put both our functions and lines in order by address.
694 : sort(functions->begin(), functions->end(),
695 4723 : Module::Function::CompareByAddress);
696 4723 : sort(lines_.begin(), lines_.end(), Module::Line::CompareByAddress);
697 :
698 : // The last line that we used any piece of. We use this only for
699 : // generating warnings.
700 4723 : const Module::Line *last_line_used = NULL;
701 :
702 : // The last function and line we warned about --- so we can avoid
703 : // doing so more than once.
704 4723 : const Module::Function *last_function_cited = NULL;
705 4723 : const Module::Line *last_line_cited = NULL;
706 :
707 : // Make a single pass through both vectors from lower to higher
708 : // addresses, populating each Function's lines vector with lines
709 : // from our lines_ vector that fall within the function's address
710 : // range.
711 4723 : vector<Module::Function *>::iterator func_it = functions->begin();
712 4723 : vector<Module::Line>::const_iterator line_it = lines_.begin();
713 :
714 : Module::Address current;
715 :
716 : // Pointers to the referents of func_it and line_it, or NULL if the
717 : // iterator is at the end of the sequence.
718 : Module::Function *func;
719 : const Module::Line *line;
720 :
721 : // Start current at the beginning of the first line or function,
722 : // whichever is earlier.
723 4723 : if (func_it != functions->end() && line_it != lines_.end()) {
724 4614 : func = *func_it;
725 4614 : line = &*line_it;
726 4614 : current = std::min(func->address, line->address);
727 109 : } else if (line_it != lines_.end()) {
728 0 : func = NULL;
729 0 : line = &*line_it;
730 0 : current = line->address;
731 109 : } else if (func_it != functions->end()) {
732 0 : func = *func_it;
733 0 : line = NULL;
734 0 : current = (*func_it)->address;
735 : } else {
736 109 : return;
737 : }
738 :
739 4436417 : while (func || line) {
740 : // This loop has two invariants that hold at the top.
741 : //
742 : // First, at least one of the iterators is not at the end of its
743 : // sequence, and those that are not refer to the earliest
744 : // function or line that contains or starts after CURRENT.
745 : //
746 : // Note that every byte is in one of four states: it is covered
747 : // or not covered by a function, and, independently, it is
748 : // covered or not covered by a line.
749 : //
750 : // The second invariant is that CURRENT refers to a byte whose
751 : // state is different from its predecessor, or it refers to the
752 : // first byte in the address space. In other words, CURRENT is
753 : // always the address of a transition.
754 : //
755 : // Note that, although each iteration advances CURRENT from one
756 : // transition address to the next in each iteration, it might
757 : // not advance the iterators. Suppose we have a function that
758 : // starts with a line, has a gap, and then a second line, and
759 : // suppose that we enter an iteration with CURRENT at the end of
760 : // the first line. The next transition address is the start of
761 : // the second line, after the gap, so the iteration should
762 : // advance CURRENT to that point. At the head of that iteration,
763 : // the invariants require that the line iterator be pointing at
764 : // the second line. But this is also true at the head of the
765 : // next. And clearly, the iteration must not change the function
766 : // iterator. So neither iterator moves.
767 :
768 : // Assert the first invariant (see above).
769 4427189 : assert(!func || current < func->address || within(*func, current));
770 4427189 : assert(!line || current < line->address || within(*line, current));
771 :
772 : // The next transition after CURRENT.
773 : Module::Address next_transition;
774 :
775 : // Figure out which state we're in, add lines or warn, and compute
776 : // the next transition address.
777 4427189 : if (func && current >= func->address) {
778 4077579 : if (line && current >= line->address) {
779 : // Covered by both a line and a function.
780 4077211 : Module::Address func_left = func->size - (current - func->address);
781 4077211 : Module::Address line_left = line->size - (current - line->address);
782 : // This may overflow, but things work out.
783 4077211 : next_transition = current + std::min(func_left, line_left);
784 4077211 : Module::Line l = *line;
785 4077211 : l.address = current;
786 4077211 : l.size = next_transition - current;
787 4077211 : func->lines.push_back(l);
788 4077211 : last_line_used = line;
789 : } else {
790 : // Covered by a function, but no line.
791 368 : if (func != last_function_cited) {
792 368 : reporter->UncoveredFunction(*func);
793 368 : last_function_cited = func;
794 : }
795 368 : if (line && within(*func, line->address))
796 0 : next_transition = line->address;
797 : else
798 : // If this overflows, we'll catch it below.
799 368 : next_transition = func->address + func->size;
800 4077579 : }
801 : } else {
802 349610 : if (line && current >= line->address) {
803 : // Covered by a line, but no function.
804 : //
805 : // If GCC emits padding after one function to align the start
806 : // of the next, then it will attribute the padding
807 : // instructions to the last source line of function (to reduce
808 : // the size of the line number info), but omit it from the
809 : // DW_AT_{low,high}_pc range given in .debug_info (since it
810 : // costs nothing to be precise there). If we did use at least
811 : // some of the line we're about to skip, and it ends at the
812 : // start of the next function, then assume this is what
813 : // happened, and don't warn.
814 12404 : if (line != last_line_cited
815 : && !(func
816 : && line == last_line_used
817 12388 : && func->address - line->address == line->size)) {
818 7063 : reporter->UncoveredLine(*line);
819 7063 : last_line_cited = line;
820 : }
821 24808 : if (func && within(*line, func->address))
822 0 : next_transition = func->address;
823 : else
824 : // If this overflows, we'll catch it below.
825 12404 : next_transition = line->address + line->size;
826 : } else {
827 : // Covered by neither a function nor a line. By the invariant,
828 : // both func and line begin after CURRENT. The next transition
829 : // is the start of the next function or next line, whichever
830 : // is earliest.
831 337206 : assert (func || line);
832 337206 : if (func && line)
833 337206 : next_transition = std::min(func->address, line->address);
834 0 : else if (func)
835 0 : next_transition = func->address;
836 : else
837 0 : next_transition = line->address;
838 : }
839 : }
840 :
841 : // If a function or line abuts the end of the address space, then
842 : // next_transition may end up being zero, in which case we've completed
843 : // our pass. Handle that here, instead of trying to deal with it in
844 : // each place we compute next_transition.
845 4427189 : if (!next_transition)
846 0 : break;
847 :
848 : // Advance iterators as needed. If lines overlap or functions overlap,
849 : // then we could go around more than once. We don't worry too much
850 : // about what result we produce in that case, just as long as we don't
851 : // hang or crash.
852 19202747 : while (func_it != functions->end()
853 5040174 : && next_transition >= (*func_it)->address
854 4690580 : && !within(**func_it, next_transition))
855 617615 : func_it++;
856 4427189 : func = (func_it != functions->end()) ? *func_it : NULL;
857 29594495 : while (line_it != lines_.end()
858 8500013 : && next_transition >= line_it->address
859 8162666 : && !within(*line_it, next_transition))
860 4077438 : line_it++;
861 4427189 : line = (line_it != lines_.end()) ? &*line_it : NULL;
862 :
863 : // We must make progress.
864 4427189 : assert(next_transition > current);
865 4427189 : current = next_transition;
866 : }
867 : }
868 :
869 4773 : void DwarfCUToModule::Finish() {
870 : // Assembly language files have no function data, and that gives us
871 : // no place to store our line numbers (even though the GNU toolchain
872 : // will happily produce source line info for assembly language
873 : // files). To avoid spurious warnings about lines we can't assign
874 : // to functions, skip CUs in languages that lack functions.
875 4773 : if (!cu_context_->language->HasFunctions())
876 50 : return;
877 :
878 : // Read source line info, if we have any.
879 4723 : if (has_source_line_info_)
880 4723 : ReadSourceLines(source_line_offset_);
881 :
882 4723 : vector<Module::Function *> *functions = &cu_context_->functions;
883 :
884 : // Dole out lines to the appropriate functions.
885 4723 : AssignLinesToFunctions();
886 :
887 : // Add our functions, which now have source lines assigned to them,
888 : // to module_.
889 : cu_context_->file_context->module->AddFunctions(functions->begin(),
890 4723 : functions->end());
891 :
892 : // Ownership of the function objects has shifted from cu_context to
893 : // the Module.
894 4723 : functions->clear();
895 : }
896 :
897 4773 : bool DwarfCUToModule::StartCompilationUnit(uint64 offset,
898 : uint8 address_size,
899 : uint8 offset_size,
900 : uint64 cu_length,
901 : uint8 dwarf_version) {
902 4773 : return dwarf_version >= 2;
903 : }
904 :
905 4773 : bool DwarfCUToModule::StartRootDIE(uint64 offset, enum DwarfTag tag,
906 : const AttributeList& attrs) {
907 : // We don't deal with partial compilation units (the only other tag
908 : // likely to be used for root DIE).
909 4773 : return tag == dwarf2reader::DW_TAG_compile_unit;
910 : }
911 :
912 : } // namespace google_breakpad
|