1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 : /* ***** BEGIN LICENSE BLOCK *****
4 : * Version: ML 1.1/GPL 2.0/LGPL 2.1
5 : *
6 : * The contents of this file are subject to the Mozilla Public License Version
7 : * 1.1 (the "License"); you may not use this file except in compliance with
8 : * the License. You may obtain a copy of the License at
9 : * http://www.mozilla.org/MPL/
10 : *
11 : * Software distributed under the License is distributed on an "AS IS" basis,
12 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 : * for the specific language governing rights and limitations under the
14 : * License.
15 : *
16 : * The Original Code is Mozilla code.
17 : *
18 : * The Initial Developer of the Original Code is the Mozilla Corporation.
19 : * Portions created by the Initial Developer are Copyright (C) 2010
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Chris Double <chris.double@double.co.nz>
24 : * Chris Pearce <chris@pearce.org.nz>
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either the GNU General Public License Version 2 or later (the "GPL"), or
28 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 : * in which case the provisions of the GPL or the LGPL are applicable instead
30 : * of those above. If you wish to allow use of your version of this file only
31 : * under the terms of either the GPL or the LGPL, and not to allow others to
32 : * use your version of this file under the terms of the MPL, indicate your
33 : * decision by deleting the provisions above and replace them with the notice
34 : * and other provisions required by the GPL or the LGPL. If you do not delete
35 : * the provisions above, a recipient may use your version of this file under
36 : * the terms of any one of the MPL, the GPL or the LGPL.
37 : *
38 : * ***** END LICENSE BLOCK ***** */
39 : #if !defined(nsOggCodecState_h_)
40 : #define nsOggCodecState_h_
41 :
42 : #include <ogg/ogg.h>
43 : #include <theora/theoradec.h>
44 : #ifdef MOZ_TREMOR
45 : #include <tremor/ivorbiscodec.h>
46 : #else
47 : #include <vorbis/codec.h>
48 : #endif
49 : #include <nsDeque.h>
50 : #include <nsTArray.h>
51 : #include <nsClassHashtable.h>
52 : #include "VideoUtils.h"
53 :
54 : #include "mozilla/StandardInteger.h"
55 :
56 : // Uncomment the following to validate that we're predicting the number
57 : // of Vorbis samples in each packet correctly.
58 : #define VALIDATE_VORBIS_SAMPLE_CALCULATION
59 : #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION
60 : #include <map>
61 : #endif
62 :
63 : // Deallocates a packet, used in nsPacketQueue below.
64 0 : class OggPacketDeallocator : public nsDequeFunctor {
65 0 : virtual void* operator() (void* aPacket) {
66 0 : ogg_packet* p = static_cast<ogg_packet*>(aPacket);
67 0 : delete [] p->packet;
68 : delete p;
69 0 : return nsnull;
70 : }
71 : };
72 :
73 : // A queue of ogg_packets. When we read a page, we extract the page's packets
74 : // and buffer them in the owning stream's nsOggCodecState. This is because
75 : // if we're skipping up to the next keyframe in very large frame sized videos,
76 : // there may be several megabytes of data between keyframes, and the
77 : // ogg_stream_state would end up resizing its buffer every time we added a
78 : // new 4KB page to the bitstream, which kills performance on Windows. This
79 : // also gives us the option to timestamp packets rather than decoded
80 : // frames/samples, reducing the amount of frames/samples we must decode to
81 : // determine start-time at a particular offset, and gives us finer control
82 : // over memory usage.
83 : class nsPacketQueue : private nsDeque {
84 : public:
85 0 : nsPacketQueue() : nsDeque(new OggPacketDeallocator()) {}
86 0 : ~nsPacketQueue() { Erase(); }
87 0 : bool IsEmpty() { return nsDeque::GetSize() == 0; }
88 : void Append(ogg_packet* aPacket);
89 0 : ogg_packet* PopFront() { return static_cast<ogg_packet*>(nsDeque::PopFront()); }
90 : ogg_packet* PeekFront() { return static_cast<ogg_packet*>(nsDeque::PeekFront()); }
91 : void PushFront(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); }
92 : void PushBack(ogg_packet* aPacket) { nsDeque::PushFront(aPacket); }
93 0 : void Erase() { nsDeque::Erase(); }
94 : };
95 :
96 : // Encapsulates the data required for decoding an ogg bitstream and for
97 : // converting granulepos to timestamps.
98 : class nsOggCodecState {
99 : public:
100 : // Ogg types we know about
101 : enum CodecType {
102 : TYPE_VORBIS=0,
103 : TYPE_THEORA=1,
104 : TYPE_SKELETON=2,
105 : TYPE_UNKNOWN=3
106 : };
107 :
108 : virtual ~nsOggCodecState();
109 :
110 : // Factory for creating nsCodecStates. Use instead of constructor.
111 : // aPage should be a beginning-of-stream page.
112 : static nsOggCodecState* Create(ogg_page* aPage);
113 :
114 0 : virtual CodecType GetType() { return TYPE_UNKNOWN; }
115 :
116 : // Reads a header packet. Returns true when last header has been read.
117 0 : virtual bool DecodeHeader(ogg_packet* aPacket) {
118 0 : return (mDoneReadingHeaders = true);
119 : }
120 :
121 : // Returns the end time that a granulepos represents.
122 0 : virtual PRInt64 Time(PRInt64 granulepos) { return -1; }
123 :
124 : // Returns the start time that a granulepos represents.
125 0 : virtual PRInt64 StartTime(PRInt64 granulepos) { return -1; }
126 :
127 : // Initializes the codec state.
128 : virtual bool Init();
129 :
130 : // Returns true when this bitstream has finished reading all its
131 : // header packets.
132 0 : bool DoneReadingHeaders() { return mDoneReadingHeaders; }
133 :
134 : // Deactivates the bitstream. Only the primary video and audio bitstreams
135 : // should be active.
136 0 : void Deactivate() {
137 0 : mActive = false;
138 0 : mDoneReadingHeaders = true;
139 0 : Reset();
140 0 : }
141 :
142 : // Resets decoding state.
143 : virtual nsresult Reset();
144 :
145 : // Returns true if the nsOggCodecState thinks this packet is a header
146 : // packet. Note this does not verify the validity of the header packet,
147 : // it just guarantees that the packet is marked as a header packet (i.e.
148 : // it is definintely not a data packet). Do not use this to identify
149 : // streams, use it to filter header packets from data packets while
150 : // decoding.
151 0 : virtual bool IsHeader(ogg_packet* aPacket) { return false; }
152 :
153 : // Returns the next packet in the stream, or nsnull if there are no more
154 : // packets buffered in the packet queue. More packets can be buffered by
155 : // inserting one or more pages into the stream by calling PageIn(). The
156 : // caller is responsible for deleting returned packet's using
157 : // nsOggCodecState::ReleasePacket(). The packet will have a valid granulepos.
158 : ogg_packet* PacketOut();
159 :
160 : // Releases the memory used by a cloned packet. Every packet returned by
161 : // PacketOut() must be free'd using this function.
162 : static void ReleasePacket(ogg_packet* aPacket);
163 :
164 : // Extracts all packets from the page, and inserts them into the packet
165 : // queue. They can be extracted by calling PacketOut(). Packets from an
166 : // inactive stream are not buffered, i.e. this call has no effect for
167 : // inactive streams. Multiple pages may need to be inserted before
168 : // PacketOut() starts to return packets, as granulepos may need to be
169 : // captured.
170 : virtual nsresult PageIn(ogg_page* aPage);
171 :
172 : // Number of packets read.
173 : PRUint64 mPacketCount;
174 :
175 : // Serial number of the bitstream.
176 : PRUint32 mSerial;
177 :
178 : // Ogg specific state.
179 : ogg_stream_state mState;
180 :
181 : // Queue of as yet undecoded packets. Packets are guaranteed to have
182 : // a valid granulepos.
183 : nsPacketQueue mPackets;
184 :
185 : // Is the bitstream active; whether we're decoding and playing this bitstream.
186 : bool mActive;
187 :
188 : // True when all headers packets have been read.
189 : bool mDoneReadingHeaders;
190 :
191 : protected:
192 : // Constructs a new nsOggCodecState. aActive denotes whether the stream is
193 : // active. For streams of unsupported or unknown types, aActive should be
194 : // false.
195 : nsOggCodecState(ogg_page* aBosPage, bool aActive);
196 :
197 : // Deallocates all packets stored in mUnstamped, and clears the array.
198 : void ClearUnstamped();
199 :
200 : // Extracts packets out of mState until a data packet with a non -1
201 : // granulepos is encountered, or no more packets are readable. Header
202 : // packets are pushed into the packet queue immediately, and data packets
203 : // are buffered in mUnstamped. Once a non -1 granulepos packet is read
204 : // the granulepos of the packets in mUnstamped can be inferred, and they
205 : // can be pushed over to mPackets. Used by PageIn() implementations in
206 : // subclasses.
207 : nsresult PacketOutUntilGranulepos(bool& aFoundGranulepos);
208 :
209 : // Temporary buffer in which to store packets while we're reading packets
210 : // in order to capture granulepos.
211 : nsTArray<ogg_packet*> mUnstamped;
212 : };
213 :
214 : class nsVorbisState : public nsOggCodecState {
215 : public:
216 : nsVorbisState(ogg_page* aBosPage);
217 : virtual ~nsVorbisState();
218 :
219 0 : CodecType GetType() { return TYPE_VORBIS; }
220 : bool DecodeHeader(ogg_packet* aPacket);
221 : PRInt64 Time(PRInt64 granulepos);
222 : bool Init();
223 : nsresult Reset();
224 : bool IsHeader(ogg_packet* aPacket);
225 : nsresult PageIn(ogg_page* aPage);
226 :
227 : // Returns the end time that a granulepos represents.
228 : static PRInt64 Time(vorbis_info* aInfo, PRInt64 aGranulePos);
229 :
230 : vorbis_info mInfo;
231 : vorbis_comment mComment;
232 : vorbis_dsp_state mDsp;
233 : vorbis_block mBlock;
234 :
235 : private:
236 :
237 : // Reconstructs the granulepos of Vorbis packets stored in the mUnstamped
238 : // array.
239 : nsresult ReconstructVorbisGranulepos();
240 :
241 : // The "block size" of the previously decoded Vorbis packet, or 0 if we've
242 : // not yet decoded anything. This is used to calculate the number of samples
243 : // in a Vorbis packet, since each Vorbis packet depends on the previous
244 : // packet while being decoded.
245 : long mPrevVorbisBlockSize;
246 :
247 : // Granulepos (end sample) of the last decoded Vorbis packet. This is used
248 : // to calculate the Vorbis granulepos when we don't find a granulepos to
249 : // back-propagate from.
250 : PRInt64 mGranulepos;
251 :
252 : #ifdef VALIDATE_VORBIS_SAMPLE_CALCULATION
253 : // When validating that we've correctly predicted Vorbis packets' number
254 : // of samples, we store each packet's predicted number of samples in this
255 : // map, and verify we decode the predicted number of samples.
256 : std::map<ogg_packet*, long> mVorbisPacketSamples;
257 : #endif
258 :
259 : // Records that aPacket is predicted to have aSamples samples.
260 : // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION
261 : // is not defined.
262 : void RecordVorbisPacketSamples(ogg_packet* aPacket, long aSamples);
263 :
264 : // Verifies that aPacket has had its number of samples predicted.
265 : // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION
266 : // is not defined.
267 : void AssertHasRecordedPacketSamples(ogg_packet* aPacket);
268 :
269 : public:
270 : // Asserts that the number of samples predicted for aPacket is aSamples.
271 : // This function has no effect if VALIDATE_VORBIS_SAMPLE_CALCULATION
272 : // is not defined.
273 : void ValidateVorbisPacketSamples(ogg_packet* aPacket, long aSamples);
274 :
275 : };
276 :
277 : // Returns 1 if the Theora info struct is decoding a media of Theora
278 : // version (maj,min,sub) or later, otherwise returns 0.
279 : int TheoraVersion(th_info* info,
280 : unsigned char maj,
281 : unsigned char min,
282 : unsigned char sub);
283 :
284 : class nsTheoraState : public nsOggCodecState {
285 : public:
286 : nsTheoraState(ogg_page* aBosPage);
287 : virtual ~nsTheoraState();
288 :
289 0 : CodecType GetType() { return TYPE_THEORA; }
290 : bool DecodeHeader(ogg_packet* aPacket);
291 : PRInt64 Time(PRInt64 granulepos);
292 : PRInt64 StartTime(PRInt64 granulepos);
293 : bool Init();
294 : bool IsHeader(ogg_packet* aPacket);
295 : nsresult PageIn(ogg_page* aPage);
296 :
297 : // Returns the maximum number of microseconds which a keyframe can be offset
298 : // from any given interframe.
299 : PRInt64 MaxKeyframeOffset();
300 :
301 : // Returns the end time that a granulepos represents.
302 : static PRInt64 Time(th_info* aInfo, PRInt64 aGranulePos);
303 :
304 : th_info mInfo;
305 : th_comment mComment;
306 : th_setup_info *mSetup;
307 : th_dec_ctx* mCtx;
308 :
309 : float mPixelAspectRatio;
310 :
311 : private:
312 :
313 : // Reconstructs the granulepos of Theora packets stored in the
314 : // mUnstamped array. mUnstamped must be filled with consecutive packets from
315 : // the stream, with the last packet having a known granulepos. Using this
316 : // known granulepos, and the known frame numbers, we recover the granulepos
317 : // of all frames in the array. This enables us to determine their timestamps.
318 : void ReconstructTheoraGranulepos();
319 :
320 : };
321 :
322 : // Constructs a 32bit version number out of two 16 bit major,minor
323 : // version numbers.
324 : #define SKELETON_VERSION(major, minor) (((major)<<16)|(minor))
325 :
326 : class nsSkeletonState : public nsOggCodecState {
327 : public:
328 : nsSkeletonState(ogg_page* aBosPage);
329 : ~nsSkeletonState();
330 0 : CodecType GetType() { return TYPE_SKELETON; }
331 : bool DecodeHeader(ogg_packet* aPacket);
332 0 : PRInt64 Time(PRInt64 granulepos) { return -1; }
333 0 : bool Init() { return true; }
334 0 : bool IsHeader(ogg_packet* aPacket) { return true; }
335 :
336 : // Return true if the given time (in milliseconds) is within
337 : // the presentation time defined in the skeleton track.
338 0 : bool IsPresentable(PRInt64 aTime) { return aTime >= mPresentationTime; }
339 :
340 : // Stores the offset of the page on which a keyframe starts,
341 : // and its presentation time.
342 0 : class nsKeyPoint {
343 : public:
344 0 : nsKeyPoint()
345 : : mOffset(INT64_MAX),
346 0 : mTime(INT64_MAX) {}
347 :
348 0 : nsKeyPoint(PRInt64 aOffset, PRInt64 aTime)
349 : : mOffset(aOffset),
350 0 : mTime(aTime) {}
351 :
352 : // Offset from start of segment/link-in-the-chain in bytes.
353 : PRInt64 mOffset;
354 :
355 : // Presentation time in usecs.
356 : PRInt64 mTime;
357 :
358 0 : bool IsNull() {
359 : return mOffset == INT64_MAX &&
360 0 : mTime == INT64_MAX;
361 : }
362 : };
363 :
364 : // Stores a keyframe's byte-offset, presentation time and the serialno
365 : // of the stream it belongs to.
366 : class nsSeekTarget {
367 : public:
368 0 : nsSeekTarget() : mSerial(0) {}
369 : nsKeyPoint mKeyPoint;
370 : PRUint32 mSerial;
371 0 : bool IsNull() {
372 0 : return mKeyPoint.IsNull() &&
373 0 : mSerial == 0;
374 : }
375 : };
376 :
377 : // Determines from the seek index the keyframe which you must seek back to
378 : // in order to get all keyframes required to render all streams with
379 : // serialnos in aTracks, at time aTarget.
380 : nsresult IndexedSeekTarget(PRInt64 aTarget,
381 : nsTArray<PRUint32>& aTracks,
382 : nsSeekTarget& aResult);
383 :
384 0 : bool HasIndex() const {
385 0 : return mIndex.IsInitialized() && mIndex.Count() > 0;
386 : }
387 :
388 : // Returns the duration of the active tracks in the media, if we have
389 : // an index. aTracks must be filled with the serialnos of the active tracks.
390 : // The duration is calculated as the greatest end time of all active tracks,
391 : // minus the smalled start time of all the active tracks.
392 : nsresult GetDuration(const nsTArray<PRUint32>& aTracks, PRInt64& aDuration);
393 :
394 : private:
395 :
396 : // Decodes an index packet. Returns false on failure.
397 : bool DecodeIndex(ogg_packet* aPacket);
398 :
399 : // Gets the keypoint you must seek to in order to get the keyframe required
400 : // to render the stream at time aTarget on stream with serial aSerialno.
401 : nsresult IndexedSeekTargetForTrack(PRUint32 aSerialno,
402 : PRInt64 aTarget,
403 : nsKeyPoint& aResult);
404 :
405 : // Version of the decoded skeleton track, as per the SKELETON_VERSION macro.
406 : PRUint32 mVersion;
407 :
408 : // Presentation time of the resource in milliseconds
409 : PRInt64 mPresentationTime;
410 :
411 : // Length of the resource in bytes.
412 : PRInt64 mLength;
413 :
414 : // Stores the keyframe index and duration information for a particular
415 : // stream.
416 : class nsKeyFrameIndex {
417 : public:
418 :
419 0 : nsKeyFrameIndex(PRInt64 aStartTime, PRInt64 aEndTime)
420 : : mStartTime(aStartTime),
421 0 : mEndTime(aEndTime)
422 : {
423 0 : MOZ_COUNT_CTOR(nsKeyFrameIndex);
424 0 : }
425 :
426 0 : ~nsKeyFrameIndex() {
427 0 : MOZ_COUNT_DTOR(nsKeyFrameIndex);
428 0 : }
429 :
430 0 : void Add(PRInt64 aOffset, PRInt64 aTimeMs) {
431 0 : mKeyPoints.AppendElement(nsKeyPoint(aOffset, aTimeMs));
432 0 : }
433 :
434 0 : const nsKeyPoint& Get(PRUint32 aIndex) const {
435 0 : return mKeyPoints[aIndex];
436 : }
437 :
438 0 : PRUint32 Length() const {
439 0 : return mKeyPoints.Length();
440 : }
441 :
442 : // Presentation time of the first sample in this stream in usecs.
443 : const PRInt64 mStartTime;
444 :
445 : // End time of the last sample in this stream in usecs.
446 : const PRInt64 mEndTime;
447 :
448 : private:
449 : nsTArray<nsKeyPoint> mKeyPoints;
450 : };
451 :
452 : // Maps Ogg serialnos to the index-keypoint list.
453 : nsClassHashtable<nsUint32HashKey, nsKeyFrameIndex> mIndex;
454 : };
455 :
456 : #endif
|