1 : // -*- mode: c++ -*-
2 :
3 : // Copyright (c) 2010 Google Inc. All Rights Reserved.
4 : //
5 : // Redistribution and use in source and binary forms, with or without
6 : // modification, are permitted provided that the following conditions are
7 : // met:
8 : //
9 : // * Redistributions of source code must retain the above copyright
10 : // notice, this list of conditions and the following disclaimer.
11 : // * Redistributions in binary form must reproduce the above
12 : // copyright notice, this list of conditions and the following disclaimer
13 : // in the documentation and/or other materials provided with the
14 : // distribution.
15 : // * Neither the name of Google Inc. nor the names of its
16 : // contributors may be used to endorse or promote products derived from
17 : // this software without specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32 :
33 : // stabs_reader.h: Define StabsReader, a parser for STABS debugging
34 : // information. A description of the STABS debugging format can be
35 : // found at:
36 : //
37 : // http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
38 : //
39 : // The comments here assume you understand the format.
40 : //
41 : // This parser can handle big-endian and little-endian data, and the symbol
42 : // values may be either 32 or 64 bits long. It handles both STABS in
43 : // sections (as used on Linux) and STABS appearing directly in an
44 : // a.out-like symbol table (as used in Darwin OS X Mach-O files).
45 :
46 : #ifndef COMMON_STABS_READER_H__
47 : #define COMMON_STABS_READER_H__
48 :
49 : #include <stddef.h>
50 : #include <stdint.h>
51 :
52 : #ifdef HAVE_A_OUT_H
53 : #include <a.out.h>
54 : #endif
55 : #ifdef HAVE_MACH_O_NLIST_H
56 : #include <mach-o/nlist.h>
57 : #endif
58 :
59 : #include <string>
60 : #include <vector>
61 :
62 : #include "common/byte_cursor.h"
63 :
64 : namespace google_breakpad {
65 :
66 : class StabsHandler;
67 :
68 0 : class StabsReader {
69 : public:
70 : // Create a reader for the STABS debug information whose .stab section is
71 : // being traversed by ITERATOR, and whose .stabstr section is referred to
72 : // by STRINGS. The reader will call the member functions of HANDLER to
73 : // report the information it finds, when the reader's 'Process' member
74 : // function is called.
75 : //
76 : // BIG_ENDIAN should be true if the entries in the .stab section are in
77 : // big-endian form, or false if they are in little-endian form.
78 : //
79 : // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
80 : // field in each entry in bytes.
81 : //
82 : // UNITIZED should be true if the STABS data is stored in units with
83 : // N_UNDF headers. This is usually the case for STABS stored in sections,
84 : // like .stab/.stabstr, and usually not the case for STABS stored in the
85 : // actual symbol table; UNITIZED should be true when parsing Linux stabs,
86 : // false when parsing Mac OS X STABS. For details, see:
87 : // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
88 : //
89 : // Note that, in ELF, the .stabstr section should be found using the
90 : // 'sh_link' field of the .stab section header, not by name.
91 : StabsReader(const uint8_t *stab, size_t stab_size,
92 : const uint8_t *stabstr, size_t stabstr_size,
93 : bool big_endian, size_t value_size, bool unitized,
94 : StabsHandler *handler);
95 :
96 : // Process the STABS data, calling the handler's member functions to
97 : // report what we find. While the handler functions return true,
98 : // continue to process until we reach the end of the section. If we
99 : // processed the entire section and all handlers returned true,
100 : // return true. If any handler returned false, return false.
101 : //
102 : // This is only meant to be called once per StabsReader instance;
103 : // resuming a prior processing pass that stopped abruptly isn't supported.
104 : bool Process();
105 :
106 : private:
107 :
108 : // An class for walking arrays of STABS entries. This isolates the main
109 : // STABS reader from the exact format (size; endianness) of the entries
110 : // themselves.
111 : class EntryIterator {
112 : public:
113 : // The contents of a STABS entry, adjusted for the host's endianness,
114 : // word size, 'struct nlist' layout, and so on.
115 : struct Entry {
116 : // True if this iterator has reached the end of the entry array. When
117 : // this is set, the other members of this structure are not valid.
118 : bool at_end;
119 :
120 : // The number of this entry within the list.
121 : size_t index;
122 :
123 : // The current entry's name offset. This is the offset within the
124 : // current compilation unit's strings, as establish by the N_UNDF entries.
125 : size_t name_offset;
126 :
127 : // The current entry's type, 'other' field, descriptor, and value.
128 : unsigned char type;
129 : unsigned char other;
130 : short descriptor;
131 : uint64_t value;
132 : };
133 :
134 : // Create a EntryIterator walking the entries in BUFFER. Treat the
135 : // entries as big-endian if BIG_ENDIAN is true, as little-endian
136 : // otherwise. Assume each entry has a 'value' field whose size is
137 : // VALUE_SIZE.
138 : //
139 : // This would not be terribly clean to extend to other format variations,
140 : // but it's enough to handle Linux and Mac, and we'd like STABS to die
141 : // anyway.
142 : //
143 : // For the record: on Linux, STABS entry values are always 32 bits,
144 : // regardless of the architecture address size (don't ask me why); on
145 : // Mac, they are 32 or 64 bits long. Oddly, the section header's entry
146 : // size for a Linux ELF .stab section varies according to the ELF class
147 : // from 12 to 20 even as the actual entries remain unchanged.
148 : EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size);
149 :
150 : // Move to the next entry. This function's behavior is undefined if
151 : // at_end() is true when it is called.
152 0 : EntryIterator &operator++() { Fetch(); entry_.index++; return *this; }
153 :
154 : // Dereferencing this iterator produces a reference to an Entry structure
155 : // that holds the current entry's values. The entry is owned by this
156 : // EntryIterator, and will be invalidated at the next call to operator++.
157 : const Entry &operator*() const { return entry_; }
158 0 : const Entry *operator->() const { return &entry_; }
159 :
160 : private:
161 : // Read the STABS entry at cursor_, and set entry_ appropriately.
162 : void Fetch();
163 :
164 : // The size of entries' value field, in bytes.
165 : size_t value_size_;
166 :
167 : // A byte cursor traversing buffer_.
168 : ByteCursor cursor_;
169 :
170 : // Values for the entry this iterator refers to.
171 : Entry entry_;
172 : };
173 :
174 : // A source line, saved to be reported later.
175 : struct Line {
176 : uint64_t address;
177 : const char *filename;
178 : int number;
179 : };
180 :
181 : // Return the name of the current symbol.
182 : const char *SymbolString();
183 :
184 : // Process a compilation unit starting at symbol_. Return true
185 : // to continue processing, or false to abort.
186 : bool ProcessCompilationUnit();
187 :
188 : // Process a function in current_source_file_ starting at symbol_.
189 : // Return true to continue processing, or false to abort.
190 : bool ProcessFunction();
191 :
192 : // The STABS entries we're parsing.
193 : ByteBuffer entries_;
194 :
195 : // The string section to which the entries refer.
196 : ByteBuffer strings_;
197 :
198 : // The iterator walking the STABS entries.
199 : EntryIterator iterator_;
200 :
201 : // True if the data is "unitized"; see the explanation in the comment for
202 : // StabsReader::StabsReader.
203 : bool unitized_;
204 :
205 : StabsHandler *handler_;
206 :
207 : // The offset of the current compilation unit's strings within stabstr_.
208 : size_t string_offset_;
209 :
210 : // The value string_offset_ should have for the next compilation unit,
211 : // as established by N_UNDF entries.
212 : size_t next_cu_string_offset_;
213 :
214 : // The current source file name.
215 : const char *current_source_file_;
216 :
217 : // Mac OS X STABS place SLINE records before functions; we accumulate a
218 : // vector of these until we see the FUN record, and then report them
219 : // after the StartFunction call.
220 : std::vector<Line> queued_lines_;
221 : };
222 :
223 : // Consumer-provided callback structure for the STABS reader. Clients
224 : // of the STABS reader provide an instance of this structure. The
225 : // reader then invokes the member functions of that instance to report
226 : // the information it finds.
227 : //
228 : // The default definitions of the member functions do nothing, and return
229 : // true so processing will continue.
230 : class StabsHandler {
231 : public:
232 0 : StabsHandler() { }
233 0 : virtual ~StabsHandler() { }
234 :
235 : // Some general notes about the handler callback functions:
236 :
237 : // Processing proceeds until the end of the .stabs section, or until
238 : // one of these functions returns false.
239 :
240 : // The addresses given are as reported in the STABS info, without
241 : // regard for whether the module may be loaded at different
242 : // addresses at different times (a shared library, say). When
243 : // processing STABS from an ELF shared library, the addresses given
244 : // all assume the library is loaded at its nominal load address.
245 : // They are *not* offsets from the nominal load address. If you
246 : // want offsets, you must subtract off the library's nominal load
247 : // address.
248 :
249 : // The arguments to these functions named FILENAME are all
250 : // references to strings stored in the .stabstr section. Because
251 : // both the Linux and Solaris linkers factor out duplicate strings
252 : // from the .stabstr section, the consumer can assume that if two
253 : // FILENAME values are different addresses, they represent different
254 : // file names.
255 : //
256 : // Thus, it's safe to use (say) std::map<char *, ...>, which does
257 : // string address comparisons, not string content comparisons.
258 : // Since all the strings are in same array of characters --- the
259 : // .stabstr section --- comparing their addresses produces
260 : // predictable, if not lexicographically meaningful, results.
261 :
262 : // Begin processing a compilation unit whose main source file is
263 : // named FILENAME, and whose base address is ADDRESS. If
264 : // BUILD_DIRECTORY is non-NULL, it is the name of the build
265 : // directory in which the compilation occurred.
266 0 : virtual bool StartCompilationUnit(const char *filename, uint64_t address,
267 : const char *build_directory) {
268 0 : return true;
269 : }
270 :
271 : // Finish processing the compilation unit. If ADDRESS is non-zero,
272 : // it is the ending address of the compilation unit. If ADDRESS is
273 : // zero, then the compilation unit's ending address is not
274 : // available, and the consumer must infer it by other means.
275 0 : virtual bool EndCompilationUnit(uint64_t address) { return true; }
276 :
277 : // Begin processing a function named NAME, whose starting address is
278 : // ADDRESS. This function belongs to the compilation unit that was
279 : // most recently started but not ended.
280 : //
281 : // Note that, unlike filenames, NAME is not a pointer into the
282 : // .stabstr section; this is because the name as it appears in the
283 : // STABS data is followed by type information. The value passed to
284 : // StartFunction is the function name alone.
285 : //
286 : // In languages that use name mangling, like C++, NAME is mangled.
287 0 : virtual bool StartFunction(const std::string &name, uint64_t address) {
288 0 : return true;
289 : }
290 :
291 : // Finish processing the function. If ADDRESS is non-zero, it is
292 : // the ending address for the function. If ADDRESS is zero, then
293 : // the function's ending address is not available, and the consumer
294 : // must infer it by other means.
295 0 : virtual bool EndFunction(uint64_t address) { return true; }
296 :
297 : // Report that the code at ADDRESS is attributable to line NUMBER of
298 : // the source file named FILENAME. The caller must infer the ending
299 : // address of the line.
300 0 : virtual bool Line(uint64_t address, const char *filename, int number) {
301 0 : return true;
302 : }
303 :
304 : // Report a warning. FORMAT is a printf-like format string,
305 : // specifying how to format the subsequent arguments.
306 : virtual void Warning(const char *format, ...) = 0;
307 : };
308 :
309 : } // namespace google_breakpad
310 :
311 : #endif // COMMON_STABS_READER_H__
|