/* sort - sort lines of text (with all kinds of options).
  Copyright (C) 1988, 1991-2011 Free Software Foundation, Inc.
  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see </licenses/>.
  Written December 1988 by Mike Haertel.
  The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  or (US mail) as Mike Haertel c/o Free Software Foundation.
  Ørn E. Hansen added NLS support in 1997.  */
#include <config.h>
#include <getopt.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include "system.h"
#include "argmatch.h"
#include "error.h"
#include "fadvise.h"
#include "filevercmp.h"
#include "hard-locale.h"
#include "hash.h"
#include "heap.h"
#include "ignore-value.h"
#include "md5.h"
#include "mbswidth.h"
#include "nproc.h"
#include "physmem.h"
#include "posixver.h"
#include "quote.h"
#include "quotearg.h"
#include "randread.h"
#include "readtokens0.h"
#include "stdio--.h"
#include "stdlib--.h"
#include "strnumcmp.h"
#include "xmemcoll.h"
#include "xnanosleep.h"
#include "xstrtol.h"
#if HAVE_SYS_RESOURCE_H
# include <sys/resource.h>
#endif
#ifndef RLIMIT_DATA
struct rlimit { size_t rlim_cur; };
# define getrlimit(Resource, Rlp) (-1)
#endif
/* The official name of this program (e.g., no `g' prefix).  */
#define PROGRAM_NAME "sort"
#define AUTHORS \
  proper_name ("Mike Haertel"), \
  proper_name ("Paul Eggert")
#if HAVE_LANGINFO_CODESET
# include <langinfo.h>
#endif
/* Use SA_NOCLDSTOP as a proxy for whether the sigaction machinery is
  present.  */
#ifndef SA_NOCLDSTOP
# define SA_NOCLDSTOP 0
/* No sigprocmask.  Always 'return' zero. */
# define sigprocmask(How, Set, Oset) (0)
# define sigset_t int
# if ! HAVE_SIGINTERRUPT
#  define siginterrupt(sig, flag) /* empty */
# endif
#endif
#if !defined OPEN_MAX && defined NR_OPEN
# define OPEN_MAX NR_OPEN
#endif
#if !defined OPEN_MAX
# define OPEN_MAX 20
#endif
#define UCHAR_LIM (UCHAR_MAX + 1)
#if HAVE_C99_STRTOLD
# define long_double long double
#else
# define long_double double
# undef strtold
# define strtold strtod
#endif
#ifndef DEFAULT_TMPDIR
# define DEFAULT_TMPDIR "/tmp"
#endif
/* Maximum number of lines to merge every time a NODE is taken from
  the merge queue.  Node is at LEVEL in the binary merge tree,
  and is responsible for merging TOTAL lines. */
#define MAX_MERGE(total, level) (((total) >> (2 * ((level) + 1))) + 1)
/* Heuristic value for the numb
er of lines for which it is worth
sort of和kind of  creating a subthread, during an internal merge sort, on a machine
  that has processors galore.  Currently this number is just a guess.
  This value must be at least 4.  We don't know of any machine where
  this number has any practical effect.  */
enum { SUBTHREAD_LINES_HEURISTIC = 4 };
/* The number of threads after which there are
  diminishing performance gains.  */
enum { DEFAULT_MAX_THREADS = 8 };
/* Exit statuses.  */
enum
  {
    /* POSIX says to exit with status 1 if invoked with -c and the
      input is not properly sorted.  */
    SORT_OUT_OF_ORDER = 1,
    /* POSIX says any other irregular exit must exit with a status
      code greater than 1.  */
    SORT_FAILURE = 2
  };
enum
  {
    /* The number of times we should try to fork a compression process
      (we retry if the fork call fails).  We don't _need_ to compress
      temp files, this is just to reduce disk access, so this number
      can be small.  Each retry doubles in duration.  */
    MAX_FORK_TRIES_COMPRESS = 4,
    /* The number of times we should try to fork a decompression process.
      If we can't fork a decompression process, we can't sort, so this
      number should be big.  Each retry doubles in duration.  */
    MAX_FORK_TRIES_DECOMPRESS = 9
  };
enum
  {
    /* Level of the end-of-merge node, one level above the root. */
    MERGE_END = 0,
    /* Level of the root node in merge tree. */
    MERGE_ROOT = 1
  };
/
* The representation of the decimal point in the current locale.  */
static int decimal_point;
/* Thousands separator; if -1, then there isn't one.  */
static int thousands_sep;
/* Nonzero if the corresponding locales are hard.  */
static bool hard_LC_COLLATE;
#if HAVE_NL_LANGINFO
static bool hard_LC_TIME;
#endif
#define NONZERO(x) ((x) != 0)
/* The kind of blanks for '-b' to skip in various options. */
enum blanktype { bl_start, bl_end, bl_both };
/* The character marking end of line. Default to \n. */
static char eolchar = '\n';
/* Lines are held in core as counted strings. */
struct line
{
  char *text;  /* Text of the line. */
  size_t length;  /* Length including final newline. */
  char *keybeg;  /* Start of first key. */
  char *keylim;  /* Limit of first key. */
};
/
* Input buffers. */
struct buffer
{
  char *buf;  /* Dynamically allocated buffer,
                                  partitioned into 3 regions:
                                  - input data;
                                  - unused area;
                                  - an array of lines, in reverse order.  */
  size_t used;  /* Number of bytes used for input data.  */
  size_t nlines;  /* Number of lines in the line array.  */
  size_t alloc;  /* Number of bytes allocated. */
  size_t left;  /* Number of bytes left from previous reads. */
  size_t line_bytes;  /* Number of bytes to reserve for each line. */
  bool eof;  /* An EOF has been read.  */
};
/* Sort key.  */
struct keyfield
{
  size_t swor
d;  /* Zero-origin 'word' to start at. */
  size_t schar;  /* Additional characters to skip. */
  size_t eword;  /* Zero-origin last 'word' of key. */
  size_t echar;  /* Additional characters in field. */
  bool const *ignore;  /* Boolean array of characters to ignore. */
  char const *translate; /* Translation applied to characters. */
  bool skipsblanks;  /* Skip leading blanks when finding start.  */
  bool skipeblanks;  /* Skip leading blanks when finding end.  */
  bool numeric;  /* Flag for numeric comparison.  Handle
                                  strings of digits with optional decimal
                                  point, but no exponential notation. */
  bool random;  /* Sort by random hash of key.  */
  bool general_numeric;  /* Flag for general, numeric comparison.
                                  Handle numbers in exponential notation. */
  bool human_numeric;  /* Flag for sorting by human readable
                                  units with either SI xor IEC prefixes. */
  bool month;  /* Flag for comparison by month name. */
  bool reverse;  /* Reverse the sense of comparison. */
  bool version;  /* sort by version number */
  bool obsolete_used;  /* obsolescent key option format is used. */
  struct keyfield *next; /* Next keyfield to try. */
};
struct month
{
  char const *name;
  int val;
};
/* Binary merge tree node. */
struct merge_node
{
  struct line *lo;              /* Lines to merge from LO child node. */
  struct line *hi;              /* Lines to merge from HI child ndoe. */
  struct line *end_lo;          /* End of available lines from LO. */
  struct line *end_hi;          /* End of available lines from HI. */
  struct line **dest;          /* Pointer to destination of merge. */
  size_t nlo;                  /* Total Lines remaining from LO. */
  size_t nhi;                  /* Total lines remaining from HI. */
  struct merge_node *parent;    /* Parent node. */
  struct merge_node *lo_child;  /* LO child node. */
  struct merge_node *hi_child;  /* HI child node. */
  unsigned int level;          /* Level in merge tree. */
  bool queued;                  /* Node is already in heap. */
  pthread_mutex_t lock;        /* Lock for node operations. */
};
/* Priority queue of merge nodes. */
struct merge_node_queue
{
  struct heap *priority_queue;  /* Priority queue of merge tree nodes. */
  pthread_mutex_t mutex;        /* Lock for queue operations. */
  pthread_cond_t cond;          /* Conditional wait for empty queue to populate
                                  when popping. */
};
/* FIXME: None of these tables work with multibyte character sets.
  Also, there are many other bugs when handling multibyte characters.
  One way to fix this is to rewrite `sort' to use wide characters
  internally, but doing this with good performance is a bit
  tricky.  */
/* Table of blanks.  */
static bool blanks[UCHAR_LIM];
/* Table of non-printing characters. */
static bool nonprinting[UCHAR_LIM];
/* Table of non-dictionary characters (not letters, digits, or blanks). */
static bool nondictionary[UCHAR_LIM];
/* Translation table folding lower case to upper.  */
static char fold_toupper[UCHAR_LIM];
#define MONTHS_PER_YEAR 12
/* Table mapping month names to integers.
  Alphabetic order allows binary search. */
static struct month monthtab[] =
{
  {"APR", 4},
  {"AUG", 8},
  {"DEC", 12},
  {"FEB", 2},
  {"JAN", 1},
  {"JUL", 7},
  {"JUN", 6},
  {"MAR", 3},
  {"MAY", 5},
  {"NOV", 11},
  {"OCT", 10},
  {"SEP", 9}
};
/* During the merge phase, the number of files to merge at once. */
#define NMERGE_DEFAULT 16
/* Minimum size for a merge or check buffer.  */
#define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
/* Minimum sort size; the code might not work with smaller sizes.  */
#define MIN_SORT_SIZE (nmerge * MIN_MERGE_BUFFER_SIZE)
/* The number of bytes needed for a merge or check buffer, which can
  function relatively efficiently even if it holds only one line.  If
  a longer line is seen, this value is increased.  */
static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
/* The approximate maximum number of bytes of main memory to use, as
  specified by the user.  Zero if the user has not specified a size.  */
static size_t sort_size;
/* The guessed size for non-regular files.  */
#define INPUT_FILE_SIZE_GUESS (1024 * 1024)
/* Array of directory names in which any temporary files are to be created. */
static char const **temp_dirs;
/* Number of temporary directory names used.  */
static size_t temp_dir_count;
/
* Number of allocated slots in temp_dirs.  */
static size_t temp_dir_alloc;
/* Flag to reverse the order of all comparisons. */
static bool reverse;
/* Flag for stable sort.  This turns off the last ditch bytewise
  comparison of lines, and instead leaves lines in the same order
  they were read if all keys compare equal.  */
static bool stable;
/* If TAB has this value, blanks separate fields.  */
enum { TAB_DEFAULT = CHAR_MAX + 1 };
/* Tab character separating fields.  If TAB_DEFAULT, then fields are
  separated by the empty string between a non-blank character and a blank
  character. */
static int tab = TAB_DEFAULT;
/* Flag to remove consecutive duplicate lines from the output.
  Only the last of a sequence of equal lines will be output. */
static bool unique;
/* Nonzero if any of the input files are the standard input. */
static bool have_read_stdin;
/* List of key field comparisons to be tried.  */
static struct keyfield *keylist;
/* Program used to (de)compress temp files.  Must accept -d.  */
static char const *compress_program;
/* Annotate the output with extra info to aid the user.  */
static bool debug;
/* Maximum number of files to merge in one go.  If more than this
  number are present, temp files will be used. */
static unsigned int nmerge = NMERGE_DEFAULT;
/* Report MESSAGE for FILE, then clean up and exit.
  If FILE is null, it represents standard output.  */
static void die (char const *, char const *) ATTRIBUTE_NORETURN;
static void
die (char const *message, char const *file)
{
  error (0, errno, "%s: %s", message, file ? file : _("standard output"));
  exit (SORT_FAILURE);
}
void
usage (int status)
{
  if (status != EXIT_SUCCESS)
    fprintf (stderr, _("Try `%s --help' for more information.\n"),
            program_name);
  else
    {
      printf (_("\
Usage: %s [OPTION]... [FILE]...\n\
  or:  %s [OPTION]... --files0-from=F\n\
"),
              program_name, program_name);
      fputs (_("\
Write sorted concatenation of all FILE(s) to standard output.\n\
\n\
"), stdout);
      fputs (_("\
Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
      fputs (_("\
Ordering options:\n\
\n\
"), stdout);
      fputs (_("\
  -b, --ignore-leading-blanks  ignore leading blanks\n\
  -d, --dictionary-order      consider only blanks and alphanumeric characters\
\n\
  -f, --ignore-case          fold lower case to upper case characters\n\
"), stdout);
      fputs (_("\
  -g, --general-numeric-sort  compare according to general numerical value\n\
  -i, --ignore-nonprinting    consider only printable characters\n\
  -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
"), stdout);
      fputs (_("\
  -h, --human-numeric-sort    compare human readable numbers (e.g., 2K 1G)\n\
"), stdout);
      fputs (_("\
  -n, --numeric-sort          compare according to string numerical value\n\
  -R, --random-sort          sort by random hash of keys\n\
      --random-source=FILE    get random bytes from FILE\n\
  -r, --reverse              reverse the result of comparisons\n\
"), stdout);
      fputs (_("\
      --sort=WORD            sort according to WORD:\n\
                                general-numeric -g, human-numeric -h, month -M,\
\n\
                                numeric -n, random -R, version -V\n\
  -V, --version-sort          natural sort of (version) numbers within text\n\
\n\
"), stdout);
      fputs (_("\
Other options:\n\
\n\
"), stdout);
      fputs (_("\
      --batch-size=NMERGE  merge at most NMERGE inputs at once;\n\
                            for more use temp files\n\
"), stdout);
      fputs (_("\
  -c, --check, --check=diagnose-first  check for sorted input; do not sort\n\
  -C, --check=quiet, --check=silent  like -c, but do not report first bad line\
\n\
      --compress-program=PROG  compress temporaries with PROG;\n\
                              decompress them with PROG -d\n\
"), stdout);
      fputs (_("\
      --debug              annotate the part of the line used to sort,\n\
                              and warn about questionable usage to stderr\n\
      --files0-from=F      read input from the files specified by\n\
                            NUL-terminated names in file F;\n\
                            If F is - then read names from standard input\n\
"), stdout);
      fputs (_("\
  -k, --key=POS1[,POS2]    start a key at