The Lean Mean C++ Option Parser
src/optionparser.h
Go to the documentation of this file.
00001 /*
00002  * The Lean Mean C++ Option Parser
00003  *
00004  * Copyright (C) 2012 Matthias S. Benkmann
00005  *
00006  * The "Software" in the following 2 paragraphs refers to this file containing
00007  * the code to The Lean Mean C++ Option Parser.
00008  * The "Software" does NOT refer to any other files which you
00009  * may have received alongside this file (e.g. as part of a larger project that
00010  * incorporates The Lean Mean C++ Option Parser).
00011  *
00012  * Permission is hereby granted, free of charge, to any person obtaining a copy
00013  * of this software, to deal in the Software without restriction, including
00014  * without limitation the rights to use, copy, modify, merge, publish,
00015  * distribute, sublicense, and/or sell copies of the Software, and to permit
00016  * persons to whom the Software is furnished to do so, subject to the following
00017  * conditions:
00018  * The above copyright notice and this permission notice shall be included in
00019  * all copies or substantial portions of the Software.
00020  *
00021  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00022  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00023  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00024  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00025  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00026  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
00027  * SOFTWARE.
00028  */
00029 
00030 /*
00031  * NOTE: It is recommended that you read the processed HTML doxygen documentation
00032  * rather than this source. If you don't know doxygen, it's like javadoc for C++.
00033  * If you don't want to install doxygen you can find a copy of the processed
00034  * documentation at
00035  *
00036  * http://optionparser.sourceforge.net/
00037  *
00038  */
00039 
00216 #ifndef OPTIONPARSER_H_
00217 #define OPTIONPARSER_H_
00218 
00220 namespace option
00221 {
00222 
00223 #ifdef _MSC_VER
00224 #include <intrin.h>
00225 #pragma intrinsic(_BitScanReverse)
00226 struct MSC_Builtin_CLZ
00227 {
00228   static int builtin_clz(unsigned x)
00229   {
00230     unsigned long index;
00231     _BitScanReverse(&index, x);
00232     return 32-index; // int is always 32bit on Windows, even for target x64
00233   }
00234 };
00235 #define __builtin_clz(x) MSC_Builtin_CLZ::builtin_clz(x)
00236 #endif
00237 
00238 class Option;
00239 
00246 enum ArgStatus
00247 {
00249   ARG_NONE,
00251   ARG_OK,
00253   ARG_IGNORE,
00255   ARG_ILLEGAL
00256 };
00257 
00286 typedef ArgStatus (*CheckArg)(const Option& option, bool msg);
00287 
00310 struct Descriptor
00311 {
00331   const unsigned index;
00332 
00340   const int type;
00341 
00353   const char* const shortopt;
00354 
00388   const char* const longopt;
00389 
00400   const CheckArg check_arg;
00401 
00417   const char* help;
00418 };
00419 
00437 class Option
00438 {
00439   Option* next_;
00440   Option* prev_;
00441 public:
00460   const Descriptor* desc;
00461 
00476   const char* name;
00477 
00484   const char* arg;
00485 
00505   int namelen;
00506 
00528   int type() const
00529   {
00530     return desc == 0 ? 0 : desc->type;
00531   }
00532 
00537   int index() const
00538   {
00539     return desc == 0 ? -1 : (int)desc->index;
00540   }
00541 
00554   int count()
00555   {
00556     int c = (desc == 0 ? 0 : 1);
00557     Option* p = first();
00558     while (!p->isLast())
00559     {
00560       ++c;
00561       p = p->next_;
00562     };
00563     return c;
00564   }
00565 
00574   bool isFirst() const
00575   {
00576     return isTagged(prev_);
00577   }
00578 
00587   bool isLast() const
00588   {
00589     return isTagged(next_);
00590   }
00591 
00603   Option* first()
00604   {
00605     Option* p = this;
00606     while (!p->isFirst())
00607       p = p->prev_;
00608     return p;
00609   }
00610 
00627   Option* last()
00628   {
00629     return first()->prevwrap();
00630   }
00631 
00640   Option* prev()
00641   {
00642     return isFirst() ? 0 : prev_;
00643   }
00644 
00653   Option* prevwrap()
00654   {
00655     return untag(prev_);
00656   }
00657 
00666   Option* next()
00667   {
00668     return isLast() ? 0 : next_;
00669   }
00670 
00679   Option* nextwrap()
00680   {
00681     return untag(next_);
00682   }
00683 
00694   void append(Option* new_last)
00695   {
00696     Option* p = last();
00697     Option* f = first();
00698     p->next_ = new_last;
00699     new_last->prev_ = p;
00700     new_last->next_ = tag(f);
00701     f->prev_ = tag(new_last);
00702   }
00703 
00720   operator const Option*() const
00721   {
00722     return desc ? this : 0;
00723   }
00724 
00741   operator Option*()
00742   {
00743     return desc ? this : 0;
00744   }
00745 
00750   Option() :
00751       desc(0), name(0), arg(0), namelen(0)
00752   {
00753     prev_ = tag(this);
00754     next_ = tag(this);
00755   }
00756 
00765   Option(const Descriptor* desc_, const char* name_, const char* arg_)
00766   {
00767     init(desc_, name_, arg_);
00768   }
00769 
00775   void operator=(const Option& orig)
00776   {
00777     init(orig.desc, orig.name, orig.arg);
00778   }
00779 
00785   Option(const Option& orig)
00786   {
00787     init(orig.desc, orig.name, orig.arg);
00788   }
00789 
00790 private:
00799   void init(const Descriptor* desc_, const char* name_, const char* arg_)
00800   {
00801     desc = desc_;
00802     name = name_;
00803     arg = arg_;
00804     prev_ = tag(this);
00805     next_ = tag(this);
00806     namelen = 0;
00807     if (name == 0)
00808       return;
00809     namelen = 1;
00810     if (name[0] != '-')
00811       return;
00812     while (name[namelen] != 0 && name[namelen] != '=')
00813       ++namelen;
00814   }
00815 
00816   static Option* tag(Option* ptr)
00817   {
00818     return (Option*) ((unsigned long long) ptr | 1);
00819   }
00820 
00821   static Option* untag(Option* ptr)
00822   {
00823     return (Option*) ((unsigned long long) ptr & ~1ull);
00824   }
00825 
00826   static bool isTagged(Option* ptr)
00827   {
00828     return ((unsigned long long) ptr & 1);
00829   }
00830 };
00831 
00886 struct Arg
00887 {
00889   static ArgStatus None(const Option&, bool)
00890   {
00891     return ARG_NONE;
00892   }
00893 
00895   static ArgStatus Optional(const Option& option, bool)
00896   {
00897     if (option.arg && option.name[option.namelen] != 0)
00898       return ARG_OK;
00899     else
00900       return ARG_IGNORE;
00901   }
00902 };
00903 
00913 struct Stats
00914 {
00924   unsigned buffer_max;
00925 
00937   unsigned options_max;
00938 
00942   Stats() :
00943       buffer_max(1), options_max(1) // 1 more than necessary as sentinel
00944   {
00945   }
00946 
00956   Stats(bool gnu, const Descriptor usage[], int argc, const char** argv, int min_abbr_len = 0, //
00957         bool single_minus_longopt = false) :
00958       buffer_max(1), options_max(1) // 1 more than necessary as sentinel
00959   {
00960     add(gnu, usage, argc, argv, min_abbr_len, single_minus_longopt);
00961   }
00962 
00964   Stats(bool gnu, const Descriptor usage[], int argc, char** argv, int min_abbr_len = 0, //
00965         bool single_minus_longopt = false) :
00966       buffer_max(1), options_max(1) // 1 more than necessary as sentinel
00967   {
00968     add(gnu, usage, argc, (const char**) argv, min_abbr_len, single_minus_longopt);
00969   }
00970 
00972   Stats(const Descriptor usage[], int argc, const char** argv, int min_abbr_len = 0, //
00973         bool single_minus_longopt = false) :
00974       buffer_max(1), options_max(1) // 1 more than necessary as sentinel
00975   {
00976     add(false, usage, argc, argv, min_abbr_len, single_minus_longopt);
00977   }
00978 
00980   Stats(const Descriptor usage[], int argc, char** argv, int min_abbr_len = 0, //
00981         bool single_minus_longopt = false) :
00982       buffer_max(1), options_max(1) // 1 more than necessary as sentinel
00983   {
00984     add(false, usage, argc, (const char**) argv, min_abbr_len, single_minus_longopt);
00985   }
00986 
00996   void add(bool gnu, const Descriptor usage[], int argc, const char** argv, int min_abbr_len = 0, //
00997            bool single_minus_longopt = false);
00998 
01000   void add(bool gnu, const Descriptor usage[], int argc, char** argv, int min_abbr_len = 0, //
01001            bool single_minus_longopt = false)
01002   {
01003     add(gnu, usage, argc, (const char**) argv, min_abbr_len, single_minus_longopt);
01004   }
01005 
01007   void add(const Descriptor usage[], int argc, const char** argv, int min_abbr_len = 0, //
01008            bool single_minus_longopt = false)
01009   {
01010     add(false, usage, argc, argv, min_abbr_len, single_minus_longopt);
01011   }
01012 
01014   void add(const Descriptor usage[], int argc, char** argv, int min_abbr_len = 0, //
01015            bool single_minus_longopt = false)
01016   {
01017     add(false, usage, argc, (const char**) argv, min_abbr_len, single_minus_longopt);
01018   }
01019 private:
01020   class CountOptionsAction;
01021 };
01022 
01043 class Parser
01044 {
01045   int op_count; 
01046   int nonop_count; 
01047   const char** nonop_args; 
01048   bool err; 
01049 public:
01050 
01054   Parser() :
01055       op_count(0), nonop_count(0), nonop_args(0), err(false)
01056   {
01057   }
01058 
01063   Parser(bool gnu, const Descriptor usage[], int argc, const char** argv, Option options[], Option buffer[],
01064          int min_abbr_len = 0, bool single_minus_longopt = false, int bufmax = -1) :
01065       op_count(0), nonop_count(0), nonop_args(0), err(false)
01066   {
01067     parse(gnu, usage, argc, argv, options, buffer, min_abbr_len, single_minus_longopt, bufmax);
01068   }
01069 
01071   Parser(bool gnu, const Descriptor usage[], int argc, char** argv, Option options[], Option buffer[],
01072          int min_abbr_len = 0, bool single_minus_longopt = false, int bufmax = -1) :
01073       op_count(0), nonop_count(0), nonop_args(0), err(false)
01074   {
01075     parse(gnu, usage, argc, (const char**) argv, options, buffer, min_abbr_len, single_minus_longopt, bufmax);
01076   }
01077 
01079   Parser(const Descriptor usage[], int argc, const char** argv, Option options[], Option buffer[], int min_abbr_len = 0,
01080          bool single_minus_longopt = false, int bufmax = -1) :
01081       op_count(0), nonop_count(0), nonop_args(0), err(false)
01082   {
01083     parse(false, usage, argc, argv, options, buffer, min_abbr_len, single_minus_longopt, bufmax);
01084   }
01085 
01087   Parser(const Descriptor usage[], int argc, char** argv, Option options[], Option buffer[], int min_abbr_len = 0,
01088          bool single_minus_longopt = false, int bufmax = -1) :
01089       op_count(0), nonop_count(0), nonop_args(0), err(false)
01090   {
01091     parse(false, usage, argc, (const char**) argv, options, buffer, min_abbr_len, single_minus_longopt, bufmax);
01092   }
01093 
01150   void parse(bool gnu, const Descriptor usage[], int argc, const char** argv, Option options[], Option buffer[],
01151              int min_abbr_len = 0, bool single_minus_longopt = false, int bufmax = -1);
01152 
01154   void parse(bool gnu, const Descriptor usage[], int argc, char** argv, Option options[], Option buffer[],
01155              int min_abbr_len = 0, bool single_minus_longopt = false, int bufmax = -1)
01156   {
01157     parse(gnu, usage, argc, (const char**) argv, options, buffer, min_abbr_len, single_minus_longopt, bufmax);
01158   }
01159 
01161   void parse(const Descriptor usage[], int argc, const char** argv, Option options[], Option buffer[],
01162              int min_abbr_len = 0, bool single_minus_longopt = false, int bufmax = -1)
01163   {
01164     parse(false, usage, argc, argv, options, buffer, min_abbr_len, single_minus_longopt, bufmax);
01165   }
01166 
01168   void parse(const Descriptor usage[], int argc, char** argv, Option options[], Option buffer[], int min_abbr_len = 0,
01169              bool single_minus_longopt = false, int bufmax = -1)
01170   {
01171     parse(false, usage, argc, (const char**) argv, options, buffer, min_abbr_len, single_minus_longopt, bufmax);
01172   }
01173 
01183   int optionsCount()
01184   {
01185     return op_count;
01186   }
01187 
01202   int nonOptionsCount()
01203   {
01204     return nonop_count;
01205   }
01206 
01218   const char** nonOptions()
01219   {
01220     return nonop_args;
01221   }
01222 
01226   const char* nonOption(int i)
01227   {
01228     return nonOptions()[i];
01229   }
01230 
01246   bool error()
01247   {
01248     return err;
01249   }
01250 
01251 private:
01252   friend struct Stats;
01253   class StoreOptionAction;
01254   struct Action;
01255 
01261   static bool workhorse(bool gnu, const Descriptor usage[], int numargs, const char** args, Action& action,
01262                         bool single_minus_longopt, bool print_errors, int min_abbr_len);
01263 
01278   static bool streq(const char* st1, const char* st2)
01279   {
01280     while (*st1 != 0)
01281       if (*st1++ != *st2++)
01282         return false;
01283     return (*st2 == 0 || *st2 == '=');
01284   }
01285 
01310   static bool streqabbr(const char* st1, const char* st2, long long min)
01311   {
01312     const char* st1start = st1;
01313     while (*st1 != 0 && (*st1 == *st2))
01314     {
01315       ++st1;
01316       ++st2;
01317     }
01318 
01319     return (*st1 == 0 || (min > 0 && (st1 - st1start) >= min)) && (*st2 == 0 || *st2 == '=');
01320   }
01321 
01328   static bool instr(char ch, const char* st)
01329   {
01330     while (*st != 0 && *st != ch)
01331       ++st;
01332     return *st == ch;
01333   }
01334 
01340   static void shift(const char** args, int count)
01341   {
01342     for (int i = 0; i > -count; --i)
01343     {
01344       const char* temp = args[i];
01345       args[i] = args[i - 1];
01346       args[i - 1] = temp;
01347     }
01348   }
01349 };
01350 
01356 struct Parser::Action
01357 {
01366   virtual bool perform(Option&)
01367   {
01368     return true;
01369   }
01370 
01379   virtual bool finished(int numargs, const char** args)
01380   {
01381     (void) numargs;
01382     (void) args;
01383     return true;
01384   }
01385 };
01386 
01392 class Stats::CountOptionsAction: public Parser::Action
01393 {
01394   unsigned* buffer_max;
01395 public:
01400   CountOptionsAction(unsigned* buffer_max_) :
01401       buffer_max(buffer_max_)
01402   {
01403   }
01404 
01405   bool perform(Option&)
01406   {
01407     if (*buffer_max == 0x7fffffff)
01408       return false; // overflow protection: don't accept number of options that doesn't fit signed int
01409     ++*buffer_max;
01410     return true;
01411   }
01412 };
01413 
01419 class Parser::StoreOptionAction: public Parser::Action
01420 {
01421   Parser& parser;
01422   Option* options;
01423   Option* buffer;
01424   int bufmax; 
01425 public:
01433   StoreOptionAction(Parser& parser_, Option options_[], Option buffer_[], int bufmax_) :
01434       parser(parser_), options(options_), buffer(buffer_), bufmax(bufmax_)
01435   {
01436     // find first empty slot in buffer (if any)
01437     int bufidx = 0;
01438     while ((bufmax < 0 || bufidx < bufmax) && buffer[bufidx])
01439       ++bufidx;
01440 
01441     // set parser's optionCount
01442     parser.op_count = bufidx;
01443   }
01444 
01445   bool perform(Option& option)
01446   {
01447     if (bufmax < 0 || parser.op_count < bufmax)
01448     {
01449       if (parser.op_count == 0x7fffffff)
01450         return false; // overflow protection: don't accept number of options that doesn't fit signed int
01451 
01452       buffer[parser.op_count] = option;
01453       int idx = buffer[parser.op_count].desc->index;
01454       if (options[idx])
01455         options[idx].append(buffer[parser.op_count]);
01456       else
01457         options[idx] = buffer[parser.op_count];
01458       ++parser.op_count;
01459     }
01460     return true; // NOTE: an option that is discarded because of a full buffer is not fatal
01461   }
01462 
01463   bool finished(int numargs, const char** args)
01464   {
01465     // only overwrite non-option argument list if there's at least 1
01466     // new non-option argument. Otherwise we keep the old list. This
01467     // makes it easy to use default non-option arguments.
01468     if (numargs > 0)
01469     {
01470       parser.nonop_count = numargs;
01471       parser.nonop_args = args;
01472     }
01473 
01474     return true;
01475   }
01476 };
01477 
01478 inline void Parser::parse(bool gnu, const Descriptor usage[], int argc, const char** argv, Option options[],
01479                           Option buffer[], int min_abbr_len, bool single_minus_longopt, int bufmax)
01480 {
01481   StoreOptionAction action(*this, options, buffer, bufmax);
01482   err = !workhorse(gnu, usage, argc, argv, action, single_minus_longopt, true, min_abbr_len);
01483 }
01484 
01485 inline void Stats::add(bool gnu, const Descriptor usage[], int argc, const char** argv, int min_abbr_len,
01486                        bool single_minus_longopt)
01487 {
01488   // determine size of options array. This is the greatest index used in the usage + 1
01489   int i = 0;
01490   while (usage[i].shortopt != 0)
01491   {
01492     if (usage[i].index + 1 >= options_max)
01493       options_max = (usage[i].index + 1) + 1; // 1 more than necessary as sentinel
01494 
01495     ++i;
01496   }
01497 
01498   CountOptionsAction action(&buffer_max);
01499   Parser::workhorse(gnu, usage, argc, argv, action, single_minus_longopt, false, min_abbr_len);
01500 }
01501 
01502 inline bool Parser::workhorse(bool gnu, const Descriptor usage[], int numargs, const char** args, Action& action,
01503                               bool single_minus_longopt, bool print_errors, int min_abbr_len)
01504 {
01505   // protect against NULL pointer
01506   if (args == 0)
01507     numargs = 0;
01508 
01509   int nonops = 0;
01510 
01511   while (numargs != 0 && *args != 0)
01512   {
01513     const char* param = *args; // param can be --long-option, -srto or non-option argument
01514 
01515     // in POSIX mode the first non-option argument terminates the option list
01516     // a lone minus character is a non-option argument
01517     if (param[0] != '-' || param[1] == 0)
01518     {
01519       if (gnu)
01520       {
01521         ++nonops;
01522         ++args;
01523         if (numargs > 0)
01524           --numargs;
01525         continue;
01526       }
01527       else
01528         break;
01529     }
01530 
01531     // -- terminates the option list. The -- itself is skipped.
01532     if (param[1] == '-' && param[2] == 0)
01533     {
01534       shift(args, nonops);
01535       ++args;
01536       if (numargs > 0)
01537         --numargs;
01538       break;
01539     }
01540 
01541     bool handle_short_options;
01542     const char* longopt_name;
01543     if (param[1] == '-') // if --long-option
01544     {
01545       handle_short_options = false;
01546       longopt_name = param + 2;
01547     }
01548     else
01549     {
01550       handle_short_options = true;
01551       longopt_name = param + 1; //for testing a potential -long-option
01552     }
01553 
01554     bool try_single_minus_longopt = single_minus_longopt;
01555     bool have_more_args = (numargs > 1 || numargs < 0); // is referencing argv[1] valid?
01556 
01557     do // loop over short options in group, for long options the body is executed only once
01558     {
01559       int idx;
01560 
01561       const char* optarg;
01562 
01563       /******************** long option **********************/
01564       if (handle_short_options == false || try_single_minus_longopt)
01565       {
01566         idx = 0;
01567         while (usage[idx].longopt != 0 && !streq(usage[idx].longopt, longopt_name))
01568           ++idx;
01569 
01570         if (usage[idx].longopt == 0 && min_abbr_len > 0) // if we should try to match abbreviated long options
01571         {
01572           int i1 = 0;
01573           while (usage[i1].longopt != 0 && !streqabbr(usage[i1].longopt, longopt_name, min_abbr_len))
01574             ++i1;
01575           if (usage[i1].longopt != 0)
01576           { // now test if the match is unambiguous by checking for another match
01577             int i2 = i1 + 1;
01578             while (usage[i2].longopt != 0 && !streqabbr(usage[i2].longopt, longopt_name, min_abbr_len))
01579               ++i2;
01580 
01581             if (usage[i2].longopt == 0) // if there was no second match it's unambiguous, so accept i1 as idx
01582               idx = i1;
01583           }
01584         }
01585 
01586         // if we found something, disable handle_short_options (only relevant if single_minus_longopt)
01587         if (usage[idx].longopt != 0)
01588           handle_short_options = false;
01589 
01590         try_single_minus_longopt = false; // prevent looking for longopt in the middle of shortopt group
01591 
01592         optarg = longopt_name;
01593         while (*optarg != 0 && *optarg != '=')
01594           ++optarg;
01595         if (*optarg == '=') // attached argument
01596           ++optarg;
01597         else
01598           // possibly detached argument
01599           optarg = (have_more_args ? args[1] : 0);
01600       }
01601 
01602       /************************ short option ***********************************/
01603       if (handle_short_options)
01604       {
01605         if (*++param == 0) // point at the 1st/next option character
01606           break; // end of short option group
01607 
01608         idx = 0;
01609         while (usage[idx].shortopt != 0 && !instr(*param, usage[idx].shortopt))
01610           ++idx;
01611 
01612         if (param[1] == 0) // if the potential argument is separate
01613           optarg = (have_more_args ? args[1] : 0);
01614         else
01615           // if the potential argument is attached
01616           optarg = param + 1;
01617       }
01618 
01619       const Descriptor* descriptor = &usage[idx];
01620 
01621       if (descriptor->shortopt == 0) /**************  unknown option ********************/
01622       {
01623         // look for dummy entry (shortopt == "" and longopt == "") to use as Descriptor for unknown options
01624         idx = 0;
01625         while (usage[idx].shortopt != 0 && (usage[idx].shortopt[0] != 0 || usage[idx].longopt[0] != 0))
01626           ++idx;
01627         descriptor = (usage[idx].shortopt == 0 ? 0 : &usage[idx]);
01628       }
01629 
01630       if (descriptor != 0)
01631       {
01632         Option option(descriptor, param, optarg);
01633         switch (descriptor->check_arg(option, print_errors))
01634         {
01635           case ARG_ILLEGAL:
01636             return false; // fatal
01637           case ARG_OK:
01638             // skip one element of the argument vector, if it's a separated argument
01639             if (optarg != 0 && have_more_args && optarg == args[1])
01640             {
01641               shift(args, nonops);
01642               if (numargs > 0)
01643                 --numargs;
01644               ++args;
01645             }
01646 
01647             // No further short options are possible after an argument
01648             handle_short_options = false;
01649 
01650             break;
01651           case ARG_IGNORE:
01652           case ARG_NONE:
01653             option.arg = 0;
01654             break;
01655         }
01656 
01657         if (!action.perform(option))
01658           return false;
01659       }
01660 
01661     } while (handle_short_options);
01662 
01663     shift(args, nonops);
01664     ++args;
01665     if (numargs > 0)
01666       --numargs;
01667 
01668   } // while
01669 
01670   if (numargs > 0 && *args == 0) // It's a bug in the caller if numargs is greater than the actual number
01671     numargs = 0; // of arguments, but as a service to the user we fix this if we spot it.
01672 
01673   if (numargs < 0) // if we don't know the number of remaining non-option arguments
01674   { // we need to count them
01675     numargs = 0;
01676     while (args[numargs] != 0)
01677       ++numargs;
01678   }
01679 
01680   return action.finished(numargs + nonops, args - nonops);
01681 }
01682 
01687 struct PrintUsageImplementation
01688 {
01693   struct IStringWriter
01694   {
01698     virtual void operator()(const char*, int)
01699     {
01700     }
01701   };
01702 
01708   template<typename Function>
01709   struct FunctionWriter: public IStringWriter
01710   {
01711     Function* write;
01712 
01713     virtual void operator()(const char* str, int size)
01714     {
01715       (*write)(str, size);
01716     }
01717 
01718     FunctionWriter(Function* w) :
01719         write(w)
01720     {
01721     }
01722   };
01723 
01729   template<typename OStream>
01730   struct OStreamWriter: public IStringWriter
01731   {
01732     OStream& ostream;
01733 
01734     virtual void operator()(const char* str, int size)
01735     {
01736       ostream.write(str, size);
01737     }
01738 
01739     OStreamWriter(OStream& o) :
01740         ostream(o)
01741     {
01742     }
01743   };
01744 
01750   template<typename Temporary>
01751   struct TemporaryWriter: public IStringWriter
01752   {
01753     const Temporary& userstream;
01754 
01755     virtual void operator()(const char* str, int size)
01756     {
01757       userstream.write(str, size);
01758     }
01759 
01760     TemporaryWriter(const Temporary& u) :
01761         userstream(u)
01762     {
01763     }
01764   };
01765 
01772   template<typename Syscall>
01773   struct SyscallWriter: public IStringWriter
01774   {
01775     Syscall* write;
01776     int fd;
01777 
01778     virtual void operator()(const char* str, int size)
01779     {
01780       (*write)(fd, str, size);
01781     }
01782 
01783     SyscallWriter(Syscall* w, int f) :
01784         write(w), fd(f)
01785     {
01786     }
01787   };
01788 
01793   template<typename Function, typename Stream>
01794   struct StreamWriter: public IStringWriter
01795   {
01796     Function* fwrite;
01797     Stream* stream;
01798 
01799     virtual void operator()(const char* str, int size)
01800     {
01801       (*fwrite)(str, size, 1, stream);
01802     }
01803 
01804     StreamWriter(Function* w, Stream* s) :
01805         fwrite(w), stream(s)
01806     {
01807     }
01808   };
01809 
01814   static void upmax(int& i1, int i2)
01815   {
01816     i1 = (i1 >= i2 ? i1 : i2);
01817   }
01818 
01830   static void indent(IStringWriter& write, int& x, int want_x)
01831   {
01832     int indent = want_x - x;
01833     if (indent < 0)
01834     {
01835       write("\n", 1);
01836       indent = want_x;
01837     }
01838 
01839     if (indent > 0)
01840     {
01841       char space = ' ';
01842       for (int i = 0; i < indent; ++i)
01843         write(&space, 1);
01844       x = want_x;
01845     }
01846   }
01847 
01866   static bool isWideChar(unsigned ch)
01867   {
01868     if (ch == 0x303F)
01869       return false;
01870 
01871     return ((0x1100 <= ch && ch <= 0x115F) || (0x2329 <= ch && ch <= 0x232A) || (0x2E80 <= ch && ch <= 0xA4C6)
01872         || (0xA960 <= ch && ch <= 0xA97C) || (0xAC00 <= ch && ch <= 0xD7FB) || (0xF900 <= ch && ch <= 0xFAFF)
01873         || (0xFE10 <= ch && ch <= 0xFE6B) || (0xFF01 <= ch && ch <= 0xFF60) || (0xFFE0 <= ch && ch <= 0xFFE6)
01874         || (0x1B000 <= ch));
01875   }
01876 
01913   class LinePartIterator
01914   {
01915     const Descriptor* tablestart; 
01916     const Descriptor* rowdesc; 
01917     const char* rowstart; 
01918     const char* ptr; 
01919     int col; 
01920     int len; 
01921     int screenlen; 
01922     int max_line_in_block; 
01923     int line_in_block; 
01924     int target_line_in_block; 
01925     bool hit_target_line; 
01926 
01931     void update_length()
01932     {
01933       screenlen = 0;
01934       for (len = 0; ptr[len] != 0 && ptr[len] != '\v' && ptr[len] != '\t' && ptr[len] != '\n'; ++len)
01935       {
01936         ++screenlen;
01937         unsigned ch = (unsigned char) ptr[len];
01938         if (ch > 0xC1) // everything <= 0xC1 (yes, even 0xC1 itself) is not a valid UTF-8 start byte
01939         {
01940           // int __builtin_clz (unsigned int x)
01941           // Returns the number of leading 0-bits in x, starting at the most significant bit
01942           unsigned mask = (unsigned) -1 >> __builtin_clz(ch ^ 0xff);
01943           ch = ch & mask; // mask out length bits, we don't verify their correctness
01944           while (((unsigned char) ptr[len + 1] ^ 0x80) <= 0x3F) // while next byte is continuation byte
01945           {
01946             ch = (ch << 6) ^ (unsigned char) ptr[len + 1] ^ 0x80; // add continuation to char code
01947             ++len;
01948           }
01949           // ch is the decoded unicode code point
01950           if (ch >= 0x1100 && isWideChar(ch)) // the test for 0x1100 is here to avoid the function call in the Latin case
01951             ++screenlen;
01952         }
01953       }
01954     }
01955 
01956   public:
01958     LinePartIterator(const Descriptor usage[]) :
01959         tablestart(usage), rowdesc(0), rowstart(0), ptr(0), col(-1), len(0), max_line_in_block(0), line_in_block(0),
01960         target_line_in_block(0), hit_target_line(true)
01961     {
01962     }
01963 
01969     bool nextTable()
01970     {
01971       // If this is NOT the first time nextTable() is called after the constructor,
01972       // then skip to the next table break (i.e. a Descriptor with help == 0)
01973       if (rowdesc != 0)
01974       {
01975         while (tablestart->help != 0 && tablestart->shortopt != 0)
01976           ++tablestart;
01977       }
01978 
01979       // Find the next table after the break (if any)
01980       while (tablestart->help == 0 && tablestart->shortopt != 0)
01981         ++tablestart;
01982 
01983       restartTable();
01984       return rowstart != 0;
01985     }
01986 
01990     void restartTable()
01991     {
01992       rowdesc = tablestart;
01993       rowstart = tablestart->help;
01994       ptr = 0;
01995     }
01996 
02002     bool nextRow()
02003     {
02004       if (ptr == 0)
02005       {
02006         restartRow();
02007         return rowstart != 0;
02008       }
02009 
02010       while (*ptr != 0 && *ptr != '\n')
02011         ++ptr;
02012 
02013       if (*ptr == 0)
02014       {
02015         if ((rowdesc + 1)->help == 0) // table break
02016           return false;
02017 
02018         ++rowdesc;
02019         rowstart = rowdesc->help;
02020       }
02021       else // if (*ptr == '\n')
02022       {
02023         rowstart = ptr + 1;
02024       }
02025 
02026       restartRow();
02027       return true;
02028     }
02029 
02033     void restartRow()
02034     {
02035       ptr = rowstart;
02036       col = -1;
02037       len = 0;
02038       screenlen = 0;
02039       max_line_in_block = 0;
02040       line_in_block = 0;
02041       target_line_in_block = 0;
02042       hit_target_line = true;
02043     }
02044 
02052     bool next()
02053     {
02054       if (ptr == 0)
02055         return false;
02056 
02057       if (col == -1)
02058       {
02059         col = 0;
02060         update_length();
02061         return true;
02062       }
02063 
02064       ptr += len;
02065       while (true)
02066       {
02067         switch (*ptr)
02068         {
02069           case '\v':
02070             upmax(max_line_in_block, ++line_in_block);
02071             ++ptr;
02072             break;
02073           case '\t':
02074             if (!hit_target_line) // if previous column did not have the targetline
02075             { // then "insert" a 0-length part
02076               update_length();
02077               hit_target_line = true;
02078               return true;
02079             }
02080 
02081             hit_target_line = false;
02082             line_in_block = 0;
02083             ++col;
02084             ++ptr;
02085             break;
02086           case 0:
02087           case '\n':
02088             if (!hit_target_line) // if previous column did not have the targetline
02089             { // then "insert" a 0-length part
02090               update_length();
02091               hit_target_line = true;
02092               return true;
02093             }
02094 
02095             if (++target_line_in_block > max_line_in_block)
02096             {
02097               update_length();
02098               return false;
02099             }
02100 
02101             hit_target_line = false;
02102             line_in_block = 0;
02103             col = 0;
02104             ptr = rowstart;
02105             continue;
02106           default:
02107             ++ptr;
02108             continue;
02109         } // switch
02110 
02111         if (line_in_block == target_line_in_block)
02112         {
02113           update_length();
02114           hit_target_line = true;
02115           return true;
02116         }
02117       } // while
02118     }
02119 
02124     int column()
02125     {
02126       return col;
02127     }
02128 
02133     int line()
02134     {
02135       return target_line_in_block; // NOT line_in_block !!! It would be wrong if !hit_target_line
02136     }
02137 
02141     int length()
02142     {
02143       return len;
02144     }
02145 
02150     int screenLength()
02151     {
02152       return screenlen;
02153     }
02154 
02158     const char* data()
02159     {
02160       return ptr;
02161     }
02162   };
02163 
02188   class LineWrapper
02189   {
02190     static const int bufmask = 15; 
02191 
02194     int lenbuf[bufmask + 1];
02198     const char* datbuf[bufmask + 1];
02205     int x;
02209     int width;
02210     int head; 
02211     int tail; 
02212 
02220     bool wrote_something;
02221 
02222     bool buf_empty()
02223     {
02224       return ((tail + 1) & bufmask) == head;
02225     }
02226 
02227     bool buf_full()
02228     {
02229       return tail == head;
02230     }
02231 
02232     void buf_store(const char* data, int len)
02233     {
02234       lenbuf[head] = len;
02235       datbuf[head] = data;
02236       head = (head + 1) & bufmask;
02237     }
02238 
02240     void buf_next()
02241     {
02242       tail = (tail + 1) & bufmask;
02243     }
02244 
02249     void output(IStringWriter& write, const char* data, int len)
02250     {
02251       if (buf_full())
02252         write_one_line(write);
02253 
02254       buf_store(data, len);
02255     }
02256 
02260     void write_one_line(IStringWriter& write)
02261     {
02262       if (wrote_something) // if we already wrote something, we need to start a new line
02263       {
02264         write("\n", 1);
02265         int _ = 0;
02266         indent(write, _, x);
02267       }
02268 
02269       if (!buf_empty())
02270       {
02271         buf_next();
02272         write(datbuf[tail], lenbuf[tail]);
02273       }
02274 
02275       wrote_something = true;
02276     }
02277   public:
02278 
02284     void flush(IStringWriter& write)
02285     {
02286       if (buf_empty())
02287         return;
02288       int _ = 0;
02289       indent(write, _, x);
02290       wrote_something = false;
02291       while (!buf_empty())
02292         write_one_line(write);
02293       write("\n", 1);
02294     }
02295 
02314     void process(IStringWriter& write, const char* data, int len)
02315     {
02316       wrote_something = false;
02317 
02318       while (len > 0)
02319       {
02320         if (len <= width) // quick test that works because utf8width <= len (all wide chars have at least 2 bytes)
02321         {
02322           output(write, data, len);
02323           len = 0;
02324         }
02325         else // if (len > width)  it's possible (but not guaranteed) that utf8len > width
02326         {
02327           int utf8width = 0;
02328           int maxi = 0;
02329           while (maxi < len && utf8width < width)
02330           {
02331             int charbytes = 1;
02332             unsigned ch = (unsigned char) data[maxi];
02333             if (ch > 0xC1) // everything <= 0xC1 (yes, even 0xC1 itself) is not a valid UTF-8 start byte
02334             {
02335               // int __builtin_clz (unsigned int x)
02336               // Returns the number of leading 0-bits in x, starting at the most significant bit
02337               unsigned mask = (unsigned) -1 >> __builtin_clz(ch ^ 0xff);
02338               ch = ch & mask; // mask out length bits, we don't verify their correctness
02339               while ((maxi + charbytes < len) && //
02340                   (((unsigned char) data[maxi + charbytes] ^ 0x80) <= 0x3F)) // while next byte is continuation byte
02341               {
02342                 ch = (ch << 6) ^ (unsigned char) data[maxi + charbytes] ^ 0x80; // add continuation to char code
02343                 ++charbytes;
02344               }
02345               // ch is the decoded unicode code point
02346               if (ch >= 0x1100 && isWideChar(ch)) // the test for 0x1100 is here to avoid the function call in the Latin case
02347               {
02348                 if (utf8width + 2 > width)
02349                   break;
02350                 ++utf8width;
02351               }
02352             }
02353             ++utf8width;
02354             maxi += charbytes;
02355           }
02356 
02357           // data[maxi-1] is the last byte of the UTF-8 sequence of the last character that fits
02358           // onto the 1st line. If maxi == len, all characters fit on the line.
02359 
02360           if (maxi == len)
02361           {
02362             output(write, data, len);
02363             len = 0;
02364           }
02365           else // if (maxi < len)  at least 1 character (data[maxi] that is) doesn't fit on the line
02366           {
02367             int i;
02368             for (i = maxi; i >= 0; --i)
02369               if (data[i] == ' ')
02370                 break;
02371 
02372             if (i >= 0)
02373             {
02374               output(write, data, i);
02375               data += i + 1;
02376               len -= i + 1;
02377             }
02378             else // did not find a space to split at => split before data[maxi]
02379             { // data[maxi] is always the beginning of a character, never a continuation byte
02380               output(write, data, maxi);
02381               data += maxi;
02382               len -= maxi;
02383             }
02384           }
02385         }
02386       }
02387       if (!wrote_something) // if we didn't already write something to make space in the buffer
02388         write_one_line(write); // write at most one line of actual output
02389     }
02390 
02397     LineWrapper(int x1, int x2) :
02398         x(x1), width(x2 - x1), head(0), tail(bufmask)
02399     {
02400       if (width < 2) // because of wide characters we need at least width 2 or the code breaks
02401         width = 2;
02402     }
02403   };
02404 
02410   static void printUsage(IStringWriter& write, const Descriptor usage[], int width = 80, //
02411                          int last_column_min_percent = 50, int last_column_own_line_max_percent = 75)
02412   {
02413     if (width < 1) // protect against nonsense values
02414       width = 80;
02415 
02416     if (width > 10000) // protect against overflow in the following computation
02417       width = 10000;
02418 
02419     int last_column_min_width = ((width * last_column_min_percent) + 50) / 100;
02420     int last_column_own_line_max_width = ((width * last_column_own_line_max_percent) + 50) / 100;
02421     if (last_column_own_line_max_width == 0)
02422       last_column_own_line_max_width = 1;
02423 
02424     LinePartIterator part(usage);
02425     while (part.nextTable())
02426     {
02427 
02428       /***************** Determine column widths *******************************/
02429 
02430       const int maxcolumns = 8; // 8 columns are enough for everyone
02431       int col_width[maxcolumns];
02432       int lastcolumn;
02433       int leftwidth;
02434       int overlong_column_threshold = 10000;
02435       do
02436       {
02437         lastcolumn = 0;
02438         for (int i = 0; i < maxcolumns; ++i)
02439           col_width[i] = 0;
02440 
02441         part.restartTable();
02442         while (part.nextRow())
02443         {
02444           while (part.next())
02445           {
02446             if (part.column() < maxcolumns)
02447             {
02448               upmax(lastcolumn, part.column());
02449               if (part.screenLength() < overlong_column_threshold)
02450                 // We don't let rows that don't use table separators (\t or \v) influence
02451                 // the width of column 0. This allows the user to interject section headers
02452                 // or explanatory paragraphs that do not participate in the table layout.
02453                 if (part.column() > 0 || part.line() > 0 || part.data()[part.length()] == '\t'
02454                     || part.data()[part.length()] == '\v')
02455                   upmax(col_width[part.column()], part.screenLength());
02456             }
02457           }
02458         }
02459 
02460         /*
02461          * If the last column doesn't fit on the same
02462          * line as the other columns, we can fix that by starting it on its own line.
02463          * However we can't do this for any of the columns 0..lastcolumn-1.
02464          * If their sum exceeds the maximum width we try to fix this by iteratively
02465          * ignoring the widest line parts in the width determination until
02466          * we arrive at a series of column widths that fit into one line.
02467          * The result is a layout where everything is nicely formatted
02468          * except for a few overlong fragments.
02469          * */
02470 
02471         leftwidth = 0;
02472         overlong_column_threshold = 0;
02473         for (int i = 0; i < lastcolumn; ++i)
02474         {
02475           leftwidth += col_width[i];
02476           upmax(overlong_column_threshold, col_width[i]);
02477         }
02478 
02479       } while (leftwidth > width);
02480 
02481       /**************** Determine tab stops and last column handling **********************/
02482 
02483       int tabstop[maxcolumns];
02484       tabstop[0] = 0;
02485       for (int i = 1; i < maxcolumns; ++i)
02486         tabstop[i] = tabstop[i - 1] + col_width[i - 1];
02487 
02488       int rightwidth = width - tabstop[lastcolumn];
02489       bool print_last_column_on_own_line = false;
02490       if (rightwidth < last_column_min_width &&  // if we don't have the minimum requested width for the last column
02491             ( col_width[lastcolumn] == 0 ||      // and all last columns are > overlong_column_threshold
02492               rightwidth < col_width[lastcolumn] // or there is at least one last column that requires more than the space available
02493             )
02494           )
02495       {
02496         print_last_column_on_own_line = true;
02497         rightwidth = last_column_own_line_max_width;
02498       }
02499 
02500       // If lastcolumn == 0 we must disable print_last_column_on_own_line because
02501       // otherwise 2 copies of the last (and only) column would be output.
02502       // Actually this is just defensive programming. It is currently not
02503       // possible that lastcolumn==0 and print_last_column_on_own_line==true
02504       // at the same time, because lastcolumn==0 => tabstop[lastcolumn] == 0 =>
02505       // rightwidth==width => rightwidth>=last_column_min_width  (unless someone passes
02506       // a bullshit value >100 for last_column_min_percent) => the above if condition
02507       // is false => print_last_column_on_own_line==false
02508       if (lastcolumn == 0)
02509         print_last_column_on_own_line = false;
02510 
02511       LineWrapper lastColumnLineWrapper(width - rightwidth, width);
02512       LineWrapper interjectionLineWrapper(0, width);
02513 
02514       part.restartTable();
02515 
02516       /***************** Print out all rows of the table *************************************/
02517 
02518       while (part.nextRow())
02519       {
02520         int x = -1;
02521         while (part.next())
02522         {
02523           if (part.column() > lastcolumn)
02524             continue; // drop excess columns (can happen if lastcolumn == maxcolumns-1)
02525 
02526           if (part.column() == 0)
02527           {
02528             if (x >= 0)
02529               write("\n", 1);
02530             x = 0;
02531           }
02532 
02533           indent(write, x, tabstop[part.column()]);
02534 
02535           if ((part.column() < lastcolumn)
02536               && (part.column() > 0 || part.line() > 0 || part.data()[part.length()] == '\t'
02537                   || part.data()[part.length()] == '\v'))
02538           {
02539             write(part.data(), part.length());
02540             x += part.screenLength();
02541           }
02542           else // either part.column() == lastcolumn or we are in the special case of
02543                // an interjection that doesn't contain \v or \t
02544           {
02545             // NOTE: This code block is not necessarily executed for
02546             // each line, because some rows may have fewer columns.
02547 
02548             LineWrapper& lineWrapper = (part.column() == 0) ? interjectionLineWrapper : lastColumnLineWrapper;
02549 
02550             if (!print_last_column_on_own_line || part.column() != lastcolumn)
02551               lineWrapper.process(write, part.data(), part.length());
02552           }
02553         } // while
02554 
02555         if (print_last_column_on_own_line)
02556         {
02557           part.restartRow();
02558           while (part.next())
02559           {
02560             if (part.column() == lastcolumn)
02561             {
02562               write("\n", 1);
02563               int _ = 0;
02564               indent(write, _, width - rightwidth);
02565               lastColumnLineWrapper.process(write, part.data(), part.length());
02566             }
02567           }
02568         }
02569 
02570         write("\n", 1);
02571         lastColumnLineWrapper.flush(write);
02572         interjectionLineWrapper.flush(write);
02573       }
02574     }
02575   }
02576 
02577 }
02578 ;
02579 
02777 template<typename OStream>
02778 void printUsage(OStream& prn, const Descriptor usage[], int width = 80, int last_column_min_percent = 50,
02779                 int last_column_own_line_max_percent = 75)
02780 {
02781   PrintUsageImplementation::OStreamWriter<OStream> write(prn);
02782   PrintUsageImplementation::printUsage(write, usage, width, last_column_min_percent, last_column_own_line_max_percent);
02783 }
02784 
02785 template<typename Function>
02786 void printUsage(Function* prn, const Descriptor usage[], int width = 80, int last_column_min_percent = 50,
02787                 int last_column_own_line_max_percent = 75)
02788 {
02789   PrintUsageImplementation::FunctionWriter<Function> write(prn);
02790   PrintUsageImplementation::printUsage(write, usage, width, last_column_min_percent, last_column_own_line_max_percent);
02791 }
02792 
02793 template<typename Temporary>
02794 void printUsage(const Temporary& prn, const Descriptor usage[], int width = 80, int last_column_min_percent = 50,
02795                 int last_column_own_line_max_percent = 75)
02796 {
02797   PrintUsageImplementation::TemporaryWriter<Temporary> write(prn);
02798   PrintUsageImplementation::printUsage(write, usage, width, last_column_min_percent, last_column_own_line_max_percent);
02799 }
02800 
02801 template<typename Syscall>
02802 void printUsage(Syscall* prn, int fd, const Descriptor usage[], int width = 80, int last_column_min_percent = 50,
02803                 int last_column_own_line_max_percent = 75)
02804 {
02805   PrintUsageImplementation::SyscallWriter<Syscall> write(prn, fd);
02806   PrintUsageImplementation::printUsage(write, usage, width, last_column_min_percent, last_column_own_line_max_percent);
02807 }
02808 
02809 template<typename Function, typename Stream>
02810 void printUsage(Function* prn, Stream* stream, const Descriptor usage[], int width = 80, int last_column_min_percent =
02811                     50,
02812                 int last_column_own_line_max_percent = 75)
02813 {
02814   PrintUsageImplementation::StreamWriter<Function, Stream> write(prn, stream);
02815   PrintUsageImplementation::printUsage(write, usage, width, last_column_min_percent, last_column_own_line_max_percent);
02816 }
02817 
02818 }
02819 // namespace option
02820 
02821 #endif /* OPTIONPARSER_H_ */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator