DOSBox-X
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Defines
src/libs/decoders/mp3_seek_table.cpp
00001 /*
00002  *  DOSBox MP3 Seek Table Handler
00003  *  -----------------------------
00004  *
00005  * Problem:
00006  *          Seeking within an MP3 file to an exact time-offset, such as is expected
00007  *          within DOS games, is extremely difficult because the MP3 format doesn't
00008  *          provide a defined relationship between the compressed data stream positions
00009  *          versus decompressed PCM times.
00010  *
00011  * Solution:
00012  *          To solve this, we step through each compressed MP3 frames in
00013  *          the MP3 file (without decoding the actual audio) and keep a record of the
00014  *          decompressed "PCM" times for each frame.  We save this relationship to
00015  *          to a local fie, called a fast-seek look-up table, which we can quickly
00016  *          reuse every subsequent time we need to seek within the MP3 file.  This allows
00017  *          seeks to be performed extremely fast while being PCM-exact.
00018  *
00019  *          This "fast-seek" file can hold data for multiple MP3s to avoid
00020  *          creating an excessive number of files in the local working directory.
00021  *
00022  * Challenges:
00023  *       1. What happens if an MP3 file is changed but the MP3's filename remains the same?
00024  *
00025  *          The lookup table is indexed based on a checksum instead of filename.
00026  *          The checksum is calculated based on a subset of the MP3's content in
00027  *          addition to being seeded based on the MP3's size in bytes.
00028  *          This makes it very sensitive to changes in MP3 content; if a change
00029  *          is detected a new lookup table is generated.
00030  *
00031  *       2. Checksums can be weak, what if a collision happens?
00032  *
00033  *          To avoid the risk of collision, we use the current best-of-breed
00034  *          xxHash algorithm that has a quality-score of 10, the highest rating
00035  *          from the SMHasher test set.  See https://github.com/Cyan4973/xxHash
00036  *          for more details.
00037  *
00038  *       3. What happens if fast-seek file is brought from a little-endian
00039  *          machine to a big-endian machine (x86 or ARM to a PowerPC or Sun
00040  *          Sparc machine)?
00041  *
00042  *          The lookup table is serialized and multi-byte types are byte-swapped
00043  *          at runtime according to the architecture. This makes fast-seek files
00044  *          cross-compatible regardless of where they were written to or read from.
00045  *
00046  *       4. What happens if this code is updated to use a new fast-seek file
00047  *          format, but an old fast-seek file exists?
00048  *
00049  *          The seek-table file is versioned (see SEEK_TABLE_IDENTIFIER befow),
00050  *          therefore, if the format and version is updated, then the seek-table
00051  *          will be regenerated.
00052 
00053  * The seek table handler makes use of the following single-header public libraries:
00054  *   - dr_mp3:   http://mackron.github.io/dr_mp3.html, by David Reid
00055  *   - archive:  https://github.com/voidah/archive, by Arthur Ouellet
00056  *   - xxHash:   http://cyan4973.github.io/xxHash, by Yann Collet
00057  *
00058  *  Copyright (C) 2020       The DOSBox Team
00059  *  Copyright (C) 2018-2019  Kevin R. Croft <krcroft@gmail.com>
00060  *
00061  *  This program is free software; you can redistribute it and/or modify
00062  *  it under the terms of the GNU General Public License as published by
00063  *  the Free Software Foundation; either version 2 of the License, or
00064  *  (at your option) any later version.
00065  *
00066  *  This program is distributed in the hope that it will be useful,
00067  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00068  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00069  *  GNU General Public License for more details.
00070  *
00071  *  You should have received a copy of the GNU General Public License along
00072  *  with this program; if not, write to the Free Software Foundation, Inc.,
00073  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00074  */
00075 
00076 #if HAVE_CONFIG_H
00077 #  include <config.h>
00078 #endif
00079 
00080 // System headers
00081 #include <sys/stat.h>
00082 #include <fstream>
00083 #include <string>
00084 #include <map>
00085 
00086 // Local headers
00087 #include "support.h"
00088 
00089 #define XXH_INLINE_ALL
00090 #include "xxhash.h"
00091 // #include "../../../include/logging.h"
00092 #include "mp3_seek_table.h"
00093 
00094 // C++ scope modifiers
00095 using std::map;
00096 using std::vector;
00097 using std::string;
00098 using std::ios_base;
00099 using std::ifstream;
00100 using std::ofstream;
00101 
00102 // Identifies a valid versioned seek-table
00103 #define SEEK_TABLE_IDENTIFIER "st-v4"
00104 
00105 // How many compressed MP3 frames should we skip between each recorded
00106 // time point.  The trade-off is as follows:
00107 //   - a large number means slower in-game seeking but a smaller fast-seek file.
00108 //   - a smaller numbers (below 10) results in fast seeks on slow hardware.
00109 constexpr uint32_t FRAMES_PER_SEEK_POINT = 7;
00110 
00111 // Returns the size of a file in bytes (if valid), otherwise -1
00112 off_t get_file_size(const char* filename) {
00113     struct stat stat_buf;
00114     int rc = stat(filename, &stat_buf);
00115     return (rc >= 0) ? stat_buf.st_size : -1;
00116 }
00117 
00118 
00119 // Calculates a unique 64-bit hash (integer) from the provided file.
00120 // This function should not cause side-effects; ie, the current
00121 // read-position within the file should not be altered.
00122 //
00123 // This function tries to files as-close to the middle of the MP3 file as possible,
00124 // and use that feed the hash function in hopes of the most uniqueness.
00125 // We're trying to avoid content that might be duplicated across MP3s, like:
00126 // 1. ID3 tag filler content, which might be boiler plate or all empty
00127 // 2. Trailing silence or similar zero-PCM content
00128 //
00129 Uint64 calculate_stream_hash(struct SDL_RWops* const context) {
00130     // Save the current stream position, so we can restore it at the end of the function.
00131     const Sint64 original_pos = SDL_RWtell(context);
00132 
00133     // Seek to the end of the file so we can calculate the stream size.
00134     SDL_RWseek(context, 0, RW_SEEK_END);
00135 
00136     const Sint64 stream_size = SDL_RWtell(context);
00137     if (stream_size <= 0) {
00138         // LOG_MSG("MP3: get_stream_size returned %d, but should be positive", stream_size);
00139         return 0;
00140     }
00141 
00142     // Seek to the middle of the file while taking into account version small files.
00143     const Sint64 tail_size = (stream_size > 32768) ? 32768 : stream_size;
00144     const Sint64 mid_pos = static_cast<Sint64>(stream_size/2.0) - tail_size;
00145     SDL_RWseek(context, mid_pos >= 0 ? mid_pos : 0, RW_SEEK_SET);
00146 
00147     // Prepare our read buffer and counter:
00148     vector<char> buffer(1024, 0);
00149     size_t total_bytes_read = 0;
00150 
00151     // Initialize xxHash's state using the stream_size as our seed.
00152     // Seeding with the stream_size provide a second level of uniqueness
00153     // in the unlikely scenario that two files of different length happen to
00154     // have the same trailing 32KB of content.  The different seeds will produce
00155     // unique hashes.
00156     XXH64_state_t* const state = XXH64_createState();
00157     if(!state) {
00158         return 0;
00159     }
00160 
00161     const uint64_t seed = static_cast<uint64_t>(stream_size);
00162     XXH64_reset(state, seed);
00163 
00164     while (total_bytes_read < static_cast<size_t>(tail_size)) {
00165         // Read a chunk of data.
00166         const size_t bytes_read = SDL_RWread(context, buffer.data(), 1, buffer.size());
00167 
00168         if (bytes_read != 0) {
00169             // Update our hash if we read data.
00170             XXH64_update(state, buffer.data(), bytes_read);
00171             total_bytes_read += bytes_read;
00172         } else {
00173             break;
00174         }
00175     }
00176 
00177     // restore the stream position
00178     SDL_RWseek(context, original_pos, RW_SEEK_SET);
00179 
00180     const Uint64 hash = XXH64_digest(state);
00181     XXH64_freeState(state);
00182     return hash;
00183 }
00184 
00185 // This function generates a new seek-table for a given mp3 stream and writes
00186 // the data to the fast-seek file.
00187 //
00188 Uint64 generate_new_seek_points(const char* filename,
00189                                 const Uint64& stream_hash,
00190                                 drmp3* const p_dr,
00191                                 map<Uint64, vector<drmp3_seek_point_serial> >& seek_points_table,
00192                                 map<Uint64, drmp3_uint64>& pcm_frame_count_table,
00193                                 vector<drmp3_seek_point_serial>& seek_points_vector) {
00194 
00195     // Initialize our frame counters with zeros.
00196     drmp3_uint64 mp3_frame_count(0);
00197     drmp3_uint64 pcm_frame_count(0);
00198 
00199     // Get the number of compressed MP3 frames and the number of uncompressed PCM frames.
00200     drmp3_bool32 result = drmp3_get_mp3_and_pcm_frame_count(p_dr,
00201                                                             &mp3_frame_count,
00202                                                             &pcm_frame_count);
00203 
00204     if (   result != DRMP3_TRUE
00205         || mp3_frame_count < FRAMES_PER_SEEK_POINT
00206         || pcm_frame_count < FRAMES_PER_SEEK_POINT) {
00207         // LOG_MSG("MP3: failed to determine or find sufficient mp3 and pcm frames");
00208         return 0;
00209     }
00210 
00211     // Based on the number of frames found in the file, we size our seek-point
00212     // vector accordingly. We then pass our sized vector into dr_mp3 which populates
00213     // the decoded PCM times.
00214     // We also take into account the desired number of "FRAMES_PER_SEEK_POINT",
00215     // which is defined above.
00216     drmp3_uint32 num_seek_points = static_cast<drmp3_uint32>
00217         (ceil_udivide(mp3_frame_count, FRAMES_PER_SEEK_POINT));
00218 
00219     seek_points_vector.resize(num_seek_points);
00220     result = drmp3_calculate_seek_points(p_dr,
00221                                          &num_seek_points,
00222                                          reinterpret_cast<drmp3_seek_point*>(seek_points_vector.data()));
00223 
00224     if (result != DRMP3_TRUE || num_seek_points == 0) {
00225         // LOG_MSG("MP3: failed to calculate sufficient seek points for stream");
00226         return 0;
00227     }
00228 
00229     // The calculate function provides us with the actual number of generated seek
00230     // points in the num_seek_points variable; so if this differs from expected then we
00231     // need to resize (ie: shrink) our vector.
00232     if (num_seek_points != seek_points_vector.size()) {
00233         seek_points_vector.resize(num_seek_points);
00234     }
00235 
00236     // Update our lookup table file with the new seek points and pcm_frame_count.
00237     // Note: the serializer elegantly handles C++ STL objects and is endian-safe.
00238     seek_points_table[stream_hash] = seek_points_vector;
00239     pcm_frame_count_table[stream_hash] = pcm_frame_count;
00240     ofstream outfile(filename, ios_base::trunc | ios_base::binary);
00241 
00242     // Caching our seek table to file is optional.  If the user is blocked due to
00243     // security or write-access issues, then this write-phase is skipped. In this
00244     // scenario the seek table will be generated on-the-fly on every start of DOSBox.
00245     if (outfile.is_open()) {
00246         Archive<ofstream> serialize(outfile);
00247         serialize << SEEK_TABLE_IDENTIFIER << seek_points_table << pcm_frame_count_table;
00248         outfile.close();
00249     }
00250 
00251     // Finally, we return the number of decoded PCM frames for this given file, which
00252     // doubles as a success-code.
00253     return pcm_frame_count;
00254 }
00255 
00256 // This function attempts to fetch a seek-table for a given mp3 stream from the fast-seek file.
00257 // If anything is amiss then this function fails.
00258 //
00259 Uint64 load_existing_seek_points(const char* filename,
00260                                  const Uint64& stream_hash,
00261                                  map<Uint64, vector<drmp3_seek_point_serial> >& seek_points_table,
00262                                  map<Uint64, drmp3_uint64>& pcm_frame_count_table,
00263                                  vector<drmp3_seek_point_serial>& seek_points) {
00264 
00265     // The below sentinals sanity check and read the incoming
00266     // file one-by-one until all the data can be trusted.
00267 
00268     // Sentinal 1: bail if we got a zero-byte file.
00269     struct stat buffer;
00270     if (stat(filename, &buffer) != 0) {
00271         return 0;
00272     }
00273 
00274     // Sentinal 2: Bail if the file isn't big enough to hold our identifier.
00275     if (get_file_size(filename) < static_cast<int64_t>(sizeof(SEEK_TABLE_IDENTIFIER))) {
00276         return 0;
00277     }
00278 
00279     // Sentinal 3: Bail if we don't get a matching identifier.
00280     string fetched_identifier;
00281     ifstream infile(filename, ios_base::binary);
00282     Archive<ifstream> deserialize(infile);
00283     deserialize >> fetched_identifier;
00284     if (fetched_identifier != SEEK_TABLE_IDENTIFIER) {
00285         infile.close();
00286         return 0;
00287     }
00288 
00289     // De-serialize the seek point and pcm_count tables.
00290     deserialize >> seek_points_table >> pcm_frame_count_table;
00291     infile.close();
00292 
00293     // Sentinal 4: does the seek_points table have our stream's hash?
00294     const auto p_seek_points = seek_points_table.find(stream_hash);
00295     if (p_seek_points == seek_points_table.end()) {
00296         return 0;
00297     }
00298 
00299     // Sentinal 5: does the pcm_frame_count table have our stream's hash?
00300     const auto p_pcm_frame_count = pcm_frame_count_table.find(stream_hash);
00301     if (p_pcm_frame_count == pcm_frame_count_table.end()) {
00302         return 0;
00303     }
00304 
00305     // If we made it here, the file was valid and has lookup-data for our
00306     // our desired stream
00307     seek_points = p_seek_points->second;
00308     return p_pcm_frame_count->second;
00309 }
00310 
00311 // This function attempts to populate our seek table for the given mp3 stream, first
00312 // attempting to read it from the fast-seek file and (if it can't be read for any reason), it
00313 // calculates new data.  It makes use of the above two functions.
00314 //
00315 uint64_t populate_seek_points(struct SDL_RWops* const context,
00316                               mp3_t* p_mp3,
00317                               const char* seektable_filename,
00318                               bool &result) {
00319 
00320     // assume failure until proven otherwise
00321     result = false;
00322 
00323     // Calculate the stream's xxHash value.
00324     Uint64 stream_hash = calculate_stream_hash(context);
00325     if (stream_hash == 0) {
00326         // LOG_MSG("MP3: could not compute the hash of the stream");
00327         return 0;
00328     }
00329 
00330     // Attempt to fetch the seek points and pcm count from an existing look up table file.
00331     map<Uint64, vector<drmp3_seek_point_serial> > seek_points_table;
00332     map<Uint64, drmp3_uint64> pcm_frame_count_table;
00333     drmp3_uint64 pcm_frame_count = load_existing_seek_points(seektable_filename,
00334                                                              stream_hash,
00335                                                              seek_points_table,
00336                                                              pcm_frame_count_table,
00337                                                              p_mp3->seek_points_vector);
00338 
00339     // Otherwise calculate new seek points and save them to the fast-seek file.
00340     if (pcm_frame_count == 0) {
00341         pcm_frame_count = generate_new_seek_points(seektable_filename,
00342                                                    stream_hash,
00343                                                    p_mp3->p_dr,
00344                                                    seek_points_table,
00345                                                    pcm_frame_count_table,
00346                                                    p_mp3->seek_points_vector);
00347         if (pcm_frame_count == 0) {
00348             // LOG_MSG("MP3: could not load existing or generate new seek points for the stream");
00349             return 0;
00350         }
00351     }
00352 
00353     // Finally, regardless of which scenario succeeded above, we now have our seek points!
00354     // We bind our seek points to the dr_mp3 object which will be used for fast seeking.
00355     if(drmp3_bind_seek_table(p_mp3->p_dr,
00356                              static_cast<uint32_t>(p_mp3->seek_points_vector.size()),
00357                              reinterpret_cast<drmp3_seek_point*>(p_mp3->seek_points_vector.data()))
00358                              != DRMP3_TRUE) {
00359         return 0;
00360     }
00361 
00362     result = true;
00363     return pcm_frame_count;
00364 }