Current File : /home/mmdealscpanel/yummmdeals.com/pcre802.tar
usr/include/pcrecpparg.h000064400000015177150403561400011310 0ustar00// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat

#ifndef _PCRECPPARG_H
#define _PCRECPPARG_H

#include <stdlib.h>    // for NULL
#include <string>

#include <pcre.h>

namespace pcrecpp {

class StringPiece;

// Hex/Octal/Binary?

// Special class for parsing into objects that define a ParseFrom() method
template <class T>
class _RE_MatchObject {
 public:
  static inline bool Parse(const char* str, int n, void* dest) {
    if (dest == NULL) return true;
    T* object = reinterpret_cast<T*>(dest);
    return object->ParseFrom(str, n);
  }
};

class PCRECPP_EXP_DEFN Arg {
 public:
  // Empty constructor so we can declare arrays of Arg
  Arg();

  // Constructor specially designed for NULL arguments
  Arg(void*);

  typedef bool (*Parser)(const char* str, int n, void* dest);

// Type-specific parsers
#define PCRE_MAKE_PARSER(type,name)                             \
  Arg(type* p) : arg_(p), parser_(name) { }                     \
  Arg(type* p, Parser parser) : arg_(p), parser_(parser) { }


  PCRE_MAKE_PARSER(char,               parse_char);
  PCRE_MAKE_PARSER(unsigned char,      parse_uchar);
  PCRE_MAKE_PARSER(short,              parse_short);
  PCRE_MAKE_PARSER(unsigned short,     parse_ushort);
  PCRE_MAKE_PARSER(int,                parse_int);
  PCRE_MAKE_PARSER(unsigned int,       parse_uint);
  PCRE_MAKE_PARSER(long,               parse_long);
  PCRE_MAKE_PARSER(unsigned long,      parse_ulong);
#if 1
  PCRE_MAKE_PARSER(long long,          parse_longlong);
#endif
#if 1
  PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong);
#endif
  PCRE_MAKE_PARSER(float,              parse_float);
  PCRE_MAKE_PARSER(double,             parse_double);
  PCRE_MAKE_PARSER(std::string,        parse_string);
  PCRE_MAKE_PARSER(StringPiece,        parse_stringpiece);

#undef PCRE_MAKE_PARSER

  // Generic constructor
  template <class T> Arg(T*, Parser parser);
  // Generic constructor template
  template <class T> Arg(T* p)
    : arg_(p), parser_(_RE_MatchObject<T>::Parse) {
  }

  // Parse the data
  bool Parse(const char* str, int n) const;

 private:
  void*         arg_;
  Parser        parser_;

  static bool parse_null          (const char* str, int n, void* dest);
  static bool parse_char          (const char* str, int n, void* dest);
  static bool parse_uchar         (const char* str, int n, void* dest);
  static bool parse_float         (const char* str, int n, void* dest);
  static bool parse_double        (const char* str, int n, void* dest);
  static bool parse_string        (const char* str, int n, void* dest);
  static bool parse_stringpiece   (const char* str, int n, void* dest);

#define PCRE_DECLARE_INTEGER_PARSER(name)                                   \
 private:                                                                   \
  static bool parse_ ## name(const char* str, int n, void* dest);           \
  static bool parse_ ## name ## _radix(                                     \
    const char* str, int n, void* dest, int radix);                         \
 public:                                                                    \
  static bool parse_ ## name ## _hex(const char* str, int n, void* dest);   \
  static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
  static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)

  PCRE_DECLARE_INTEGER_PARSER(short);
  PCRE_DECLARE_INTEGER_PARSER(ushort);
  PCRE_DECLARE_INTEGER_PARSER(int);
  PCRE_DECLARE_INTEGER_PARSER(uint);
  PCRE_DECLARE_INTEGER_PARSER(long);
  PCRE_DECLARE_INTEGER_PARSER(ulong);
  PCRE_DECLARE_INTEGER_PARSER(longlong);
  PCRE_DECLARE_INTEGER_PARSER(ulonglong);

#undef PCRE_DECLARE_INTEGER_PARSER
};

inline Arg::Arg() : arg_(NULL), parser_(parse_null) { }
inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }

inline bool Arg::Parse(const char* str, int n) const {
  return (*parser_)(str, n, arg_);
}

// This part of the parser, appropriate only for ints, deals with bases
#define MAKE_INTEGER_PARSER(type, name) \
  inline Arg Hex(type* ptr) { \
    return Arg(ptr, Arg::parse_ ## name ## _hex); } \
  inline Arg Octal(type* ptr) { \
    return Arg(ptr, Arg::parse_ ## name ## _octal); } \
  inline Arg CRadix(type* ptr) { \
    return Arg(ptr, Arg::parse_ ## name ## _cradix); }

MAKE_INTEGER_PARSER(short,              short)     /*                        */
MAKE_INTEGER_PARSER(unsigned short,     ushort)    /*                        */
MAKE_INTEGER_PARSER(int,                int)       /* Don't use semicolons   */
MAKE_INTEGER_PARSER(unsigned int,       uint)      /* after these statement  */
MAKE_INTEGER_PARSER(long,               long)      /* because they can cause */
MAKE_INTEGER_PARSER(unsigned long,      ulong)     /* compiler warnings if   */
#if 1                          /* the checking level is  */
MAKE_INTEGER_PARSER(long long,          longlong)  /* turned up high enough. */
#endif                                             /*                        */
#if 1                         /*                        */
MAKE_INTEGER_PARSER(unsigned long long, ulonglong) /*                        */
#endif

#undef PCRE_IS_SET
#undef PCRE_SET_OR_CLEAR
#undef MAKE_INTEGER_PARSER

}   // namespace pcrecpp


#endif /* _PCRECPPARG_H */
usr/include/pcreposix.h000064400000012400150403561400011160 0ustar00/*************************************************
*       Perl-Compatible Regular Expressions      *
*************************************************/

#ifndef _PCREPOSIX_H
#define _PCREPOSIX_H

/* This is the header for the POSIX wrapper interface to the PCRE Perl-
Compatible Regular Expression library. It defines the things POSIX says should
be there. I hope.

            Copyright (c) 1997-2009 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.

    * Neither the name of the University of Cambridge nor the names of its
      contributors may be used to endorse or promote products derived from
      this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/

/* Have to include stdlib.h in order to ensure that size_t is defined. */

#include <stdlib.h>

/* Allow for C++ users */

#ifdef __cplusplus
extern "C" {
#endif

/* Options, mostly defined by POSIX, but with some extras. */

#define REG_ICASE     0x0001   /* Maps to PCRE_CASELESS */
#define REG_NEWLINE   0x0002   /* Maps to PCRE_MULTILINE */
#define REG_NOTBOL    0x0004   /* Maps to PCRE_NOTBOL */
#define REG_NOTEOL    0x0008   /* Maps to PCRE_NOTEOL */
#define REG_DOTALL    0x0010   /* NOT defined by POSIX; maps to PCRE_DOTALL */
#define REG_NOSUB     0x0020   /* Maps to PCRE_NO_AUTO_CAPTURE */
#define REG_UTF8      0x0040   /* NOT defined by POSIX; maps to PCRE_UTF8 */
#define REG_STARTEND  0x0080   /* BSD feature: pass subject string by so,eo */
#define REG_NOTEMPTY  0x0100   /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */
#define REG_UNGREEDY  0x0200   /* NOT defined by POSIX; maps to PCRE_UNGREEDY */

/* This is not used by PCRE, but by defining it we make it easier
to slot PCRE into existing programs that make POSIX calls. */

#define REG_EXTENDED  0

/* Error values. Not all these are relevant or used by the wrapper. */

enum {
  REG_ASSERT = 1,  /* internal error ? */
  REG_BADBR,       /* invalid repeat counts in {} */
  REG_BADPAT,      /* pattern error */
  REG_BADRPT,      /* ? * + invalid */
  REG_EBRACE,      /* unbalanced {} */
  REG_EBRACK,      /* unbalanced [] */
  REG_ECOLLATE,    /* collation error - not relevant */
  REG_ECTYPE,      /* bad class */
  REG_EESCAPE,     /* bad escape sequence */
  REG_EMPTY,       /* empty expression */
  REG_EPAREN,      /* unbalanced () */
  REG_ERANGE,      /* bad range inside [] */
  REG_ESIZE,       /* expression too big */
  REG_ESPACE,      /* failed to get memory */
  REG_ESUBREG,     /* bad back reference */
  REG_INVARG,      /* bad argument */
  REG_NOMATCH      /* match failed */
};


/* The structure representing a compiled regular expression. */

typedef struct {
  void *re_pcre;
  size_t re_nsub;
  size_t re_erroffset;
} regex_t;

/* The structure in which a captured offset is returned. */

typedef int regoff_t;

typedef struct {
  regoff_t rm_so;
  regoff_t rm_eo;
} regmatch_t;

/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export settings are needed, and are set in pcreposix.c before including this
file. */

#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
#  define PCREPOSIX_EXP_DECL  extern __declspec(dllimport)
#  define PCREPOSIX_EXP_DEFN  __declspec(dllimport)
#endif

/* By default, we use the standard "extern" declarations. */

#ifndef PCREPOSIX_EXP_DECL
#  ifdef __cplusplus
#    define PCREPOSIX_EXP_DECL  extern "C"
#    define PCREPOSIX_EXP_DEFN  extern "C"
#  else
#    define PCREPOSIX_EXP_DECL  extern
#    define PCREPOSIX_EXP_DEFN  extern
#  endif
#endif

/* The functions */

PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
                     regmatch_t *, int);
PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
PCREPOSIX_EXP_DECL void regfree(regex_t *);

#ifdef __cplusplus
}   /* extern "C" */
#endif

#endif /* End of pcreposix.h */
usr/include/pcrecpp.h000064400000063641150403561410010616 0ustar00// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005

#ifndef _PCRECPP_H
#define _PCRECPP_H

// C++ interface to the pcre regular-expression library.  RE supports
// Perl-style regular expressions (with extensions like \d, \w, \s,
// ...).
//
// -----------------------------------------------------------------------
// REGEXP SYNTAX:
//
// This module is part of the pcre library and hence supports its syntax
// for regular expressions.
//
// The syntax is pretty similar to Perl's.  For those not familiar
// with Perl's regular expressions, here are some examples of the most
// commonly used extensions:
//
//   "hello (\\w+) world"  -- \w matches a "word" character
//   "version (\\d+)"      -- \d matches a digit
//   "hello\\s+world"      -- \s matches any whitespace character
//   "\\b(\\w+)\\b"        -- \b matches empty string at a word boundary
//   "(?i)hello"           -- (?i) turns on case-insensitive matching
//   "/\\*(.*?)\\*/"       -- .*? matches . minimum no. of times possible
//
// -----------------------------------------------------------------------
// MATCHING INTERFACE:
//
// The "FullMatch" operation checks that supplied text matches a
// supplied pattern exactly.
//
// Example: successful match
//    pcrecpp::RE re("h.*o");
//    re.FullMatch("hello");
//
// Example: unsuccessful match (requires full match):
//    pcrecpp::RE re("e");
//    !re.FullMatch("hello");
//
// Example: creating a temporary RE object:
//    pcrecpp::RE("h.*o").FullMatch("hello");
//
// You can pass in a "const char*" or a "string" for "text".  The
// examples below tend to use a const char*.
//
// You can, as in the different examples above, store the RE object
// explicitly in a variable or use a temporary RE object.  The
// examples below use one mode or the other arbitrarily.  Either
// could correctly be used for any of these examples.
//
// -----------------------------------------------------------------------
// MATCHING WITH SUB-STRING EXTRACTION:
//
// You can supply extra pointer arguments to extract matched subpieces.
//
// Example: extracts "ruby" into "s" and 1234 into "i"
//    int i;
//    string s;
//    pcrecpp::RE re("(\\w+):(\\d+)");
//    re.FullMatch("ruby:1234", &s, &i);
//
// Example: does not try to extract any extra sub-patterns
//    re.FullMatch("ruby:1234", &s);
//
// Example: does not try to extract into NULL
//    re.FullMatch("ruby:1234", NULL, &i);
//
// Example: integer overflow causes failure
//    !re.FullMatch("ruby:1234567891234", NULL, &i);
//
// Example: fails because there aren't enough sub-patterns:
//    !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
//
// Example: fails because string cannot be stored in integer
//    !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
//
// The provided pointer arguments can be pointers to any scalar numeric
// type, or one of
//    string        (matched piece is copied to string)
//    StringPiece   (StringPiece is mutated to point to matched piece)
//    T             (where "bool T::ParseFrom(const char*, int)" exists)
//    NULL          (the corresponding matched sub-pattern is not copied)
//
// CAVEAT: An optional sub-pattern that does not exist in the matched
// string is assigned the empty string.  Therefore, the following will
// return false (because the empty string is not a valid number):
//    int number;
//    pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
//
// -----------------------------------------------------------------------
// DO_MATCH
//
// The matching interface supports at most 16 arguments per call.
// If you need more, consider using the more general interface
// pcrecpp::RE::DoMatch().  See pcrecpp.h for the signature for DoMatch.
//
// -----------------------------------------------------------------------
// PARTIAL MATCHES
//
// You can use the "PartialMatch" operation when you want the pattern
// to match any substring of the text.
//
// Example: simple search for a string:
//    pcrecpp::RE("ell").PartialMatch("hello");
//
// Example: find first number in a string:
//    int number;
//    pcrecpp::RE re("(\\d+)");
//    re.PartialMatch("x*100 + 20", &number);
//    assert(number == 100);
//
// -----------------------------------------------------------------------
// UTF-8 AND THE MATCHING INTERFACE:
//
// By default, pattern and text are plain text, one byte per character.
// The UTF8 flag, passed to the constructor, causes both pattern
// and string to be treated as UTF-8 text, still a byte stream but
// potentially multiple bytes per character. In practice, the text
// is likelier to be UTF-8 than the pattern, but the match returned
// may depend on the UTF8 flag, so always use it when matching
// UTF8 text.  E.g., "." will match one byte normally but with UTF8
// set may match up to three bytes of a multi-byte character.
//
// Example:
//    pcrecpp::RE_Options options;
//    options.set_utf8();
//    pcrecpp::RE re(utf8_pattern, options);
//    re.FullMatch(utf8_string);
//
// Example: using the convenience function UTF8():
//    pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
//    re.FullMatch(utf8_string);
//
// NOTE: The UTF8 option is ignored if pcre was not configured with the
//       --enable-utf8 flag.
//
// -----------------------------------------------------------------------
// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
//
// PCRE defines some modifiers to change the behavior of the regular
// expression engine.
// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
// to pass such modifiers to a RE class.
//
// Currently, the following modifiers are supported
//
//    modifier              description               Perl corresponding
//
//    PCRE_CASELESS         case insensitive match    /i
//    PCRE_MULTILINE        multiple lines match      /m
//    PCRE_DOTALL           dot matches newlines      /s
//    PCRE_DOLLAR_ENDONLY   $ matches only at end     N/A
//    PCRE_EXTRA            strict escape parsing     N/A
//    PCRE_EXTENDED         ignore whitespaces        /x
//    PCRE_UTF8             handles UTF8 chars        built-in
//    PCRE_UNGREEDY         reverses * and *?         N/A
//    PCRE_NO_AUTO_CAPTURE  disables matching parens  N/A (*)
//
// (For a full account on how each modifier works, please check the
// PCRE API reference manual).
//
// (*) Both Perl and PCRE allow non matching parentheses by means of the
// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
// capture, while (ab|cd) does.
//
// For each modifier, there are two member functions whose name is made
// out of the modifier in lowercase, without the "PCRE_" prefix. For
// instance, PCRE_CASELESS is handled by
//    bool caseless(),
// which returns true if the modifier is set, and
//    RE_Options & set_caseless(bool),
// which sets or unsets the modifier.
//
// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
// set_match_limit() and match_limit() member functions.
// Setting match_limit to a non-zero value will limit the executation of
// pcre to keep it from doing bad things like blowing the stack or taking
// an eternity to return a result.  A value of 5000 is good enough to stop
// stack blowup in a 2MB thread stack.  Setting match_limit to zero will
// disable match limiting.  Alternately, you can set match_limit_recursion()
// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
// recurses.  match_limit() caps the number of matches pcre does;
// match_limit_recrusion() caps the depth of recursion.
//
// Normally, to pass one or more modifiers to a RE class, you declare
// a RE_Options object, set the appropriate options, and pass this
// object to a RE constructor. Example:
//
//    RE_options opt;
//    opt.set_caseless(true);
//
//    if (RE("HELLO", opt).PartialMatch("hello world")) ...
//
// RE_options has two constructors. The default constructor takes no
// arguments and creates a set of flags that are off by default.
//
// The optional parameter 'option_flags' is to facilitate transfer
// of legacy code from C programs.  This lets you do
//    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
//
// But new code is better off doing
//    RE(pattern,
//      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
// (See below)
//
// If you are going to pass one of the most used modifiers, there are some
// convenience functions that return a RE_Options class with the
// appropriate modifier already set:
// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
//
// If you need to set several options at once, and you don't want to go
// through the pains of declaring a RE_Options object and setting several
// options, there is a parallel method that give you such ability on the
// fly. You can concatenate several set_xxxxx member functions, since each
// of them returns a reference to its class object.  e.g.: to pass
// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
// statement, you may write
//
//    RE(" ^ xyz \\s+ .* blah$", RE_Options()
//                            .set_caseless(true)
//                            .set_extended(true)
//                            .set_multiline(true)).PartialMatch(sometext);
//
// -----------------------------------------------------------------------
// SCANNING TEXT INCREMENTALLY
//
// The "Consume" operation may be useful if you want to repeatedly
// match regular expressions at the front of a string and skip over
// them as they match.  This requires use of the "StringPiece" type,
// which represents a sub-range of a real string.  Like RE, StringPiece
// is defined in the pcrecpp namespace.
//
// Example: read lines of the form "var = value" from a string.
//    string contents = ...;                 // Fill string somehow
//    pcrecpp::StringPiece input(contents);  // Wrap in a StringPiece
//
//    string var;
//    int value;
//    pcrecpp::RE re("(\\w+) = (\\d+)\n");
//    while (re.Consume(&input, &var, &value)) {
//      ...;
//    }
//
// Each successful call to "Consume" will set "var/value", and also
// advance "input" so it points past the matched text.
//
// The "FindAndConsume" operation is similar to "Consume" but does not
// anchor your match at the beginning of the string.  For example, you
// could extract all words from a string by repeatedly calling
//     pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
//
// -----------------------------------------------------------------------
// PARSING HEX/OCTAL/C-RADIX NUMBERS
//
// By default, if you pass a pointer to a numeric value, the
// corresponding text is interpreted as a base-10 number.  You can
// instead wrap the pointer with a call to one of the operators Hex(),
// Octal(), or CRadix() to interpret the text in another base.  The
// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
// prefixes, but defaults to base-10.
//
// Example:
//   int a, b, c, d;
//   pcrecpp::RE re("(.*) (.*) (.*) (.*)");
//   re.FullMatch("100 40 0100 0x40",
//                pcrecpp::Octal(&a), pcrecpp::Hex(&b),
//                pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
// will leave 64 in a, b, c, and d.
//
// -----------------------------------------------------------------------
// REPLACING PARTS OF STRINGS
//
// You can replace the first match of "pattern" in "str" with
// "rewrite".  Within "rewrite", backslash-escaped digits (\1 to \9)
// can be used to insert text matching corresponding parenthesized
// group from the pattern.  \0 in "rewrite" refers to the entire
// matching text.  E.g.,
//
//   string s = "yabba dabba doo";
//   pcrecpp::RE("b+").Replace("d", &s);
//
// will leave "s" containing "yada dabba doo".  The result is true if
// the pattern matches and a replacement occurs, or false otherwise.
//
// GlobalReplace() is like Replace(), except that it replaces all
// occurrences of the pattern in the string with the rewrite.
// Replacements are not subject to re-matching.  E.g.,
//
//   string s = "yabba dabba doo";
//   pcrecpp::RE("b+").GlobalReplace("d", &s);
//
// will leave "s" containing "yada dada doo".  It returns the number
// of replacements made.
//
// Extract() is like Replace(), except that if the pattern matches,
// "rewrite" is copied into "out" (an additional argument) with
// substitutions.  The non-matching portions of "text" are ignored.
// Returns true iff a match occurred and the extraction happened
// successfully.  If no match occurs, the string is left unaffected.


#include <string>
#include <pcre.h>
#include <pcrecpparg.h>   // defines the Arg class
// This isn't technically needed here, but we include it
// anyway so folks who include pcrecpp.h don't have to.
#include <pcre_stringpiece.h>

namespace pcrecpp {

#define PCRE_SET_OR_CLEAR(b, o) \
    if (b) all_options_ |= (o); else all_options_ &= ~(o); \
    return *this

#define PCRE_IS_SET(o)  \
        (all_options_ & o) == o

/***** Compiling regular expressions: the RE class *****/

// RE_Options allow you to set options to be passed along to pcre,
// along with other options we put on top of pcre.
// Only 9 modifiers, plus match_limit and match_limit_recursion,
// are supported now.
class PCRECPP_EXP_DEFN RE_Options {
 public:
  // constructor
  RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {}

  // alternative constructor.
  // To facilitate transfer of legacy code from C programs
  //
  // This lets you do
  //    RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
  // But new code is better off doing
  //    RE(pattern,
  //      RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
  RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0),
                                 all_options_(option_flags) {}
  // we're fine with the default destructor, copy constructor, etc.

  // accessors and mutators
  int match_limit() const { return match_limit_; };
  RE_Options &set_match_limit(int limit) {
    match_limit_ = limit;
    return *this;
  }

  int match_limit_recursion() const { return match_limit_recursion_; };
  RE_Options &set_match_limit_recursion(int limit) {
    match_limit_recursion_ = limit;
    return *this;
  }

  bool caseless() const {
    return PCRE_IS_SET(PCRE_CASELESS);
  }
  RE_Options &set_caseless(bool x) {
    PCRE_SET_OR_CLEAR(x, PCRE_CASELESS);
  }

  bool multiline() const {
    return PCRE_IS_SET(PCRE_MULTILINE);
  }
  RE_Options &set_multiline(bool x) {
    PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE);
  }

  bool dotall() const {
    return PCRE_IS_SET(PCRE_DOTALL);
  }
  RE_Options &set_dotall(bool x) {
    PCRE_SET_OR_CLEAR(x, PCRE_DOTALL);
  }

  bool extended() const {
    return PCRE_IS_SET(PCRE_EXTENDED);
  }
  RE_Options &set_extended(bool x) {
    PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED);
  }

  bool dollar_endonly() const {
    return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY);
  }
  RE_Options &set_dollar_endonly(bool x) {
    PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY);
  }

  bool extra() const {
    return PCRE_IS_SET(PCRE_EXTRA);
  }
  RE_Options &set_extra(bool x) {
    PCRE_SET_OR_CLEAR(x, PCRE_EXTRA);
  }

  bool ungreedy() const {
    return PCRE_IS_SET(PCRE_UNGREEDY);
  }
  RE_Options &set_ungreedy(bool x) {
    PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY);
  }

  bool utf8() const {
    return PCRE_IS_SET(PCRE_UTF8);
  }
  RE_Options &set_utf8(bool x) {
    PCRE_SET_OR_CLEAR(x, PCRE_UTF8);
  }

  bool no_auto_capture() const {
    return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE);
  }
  RE_Options &set_no_auto_capture(bool x) {
    PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE);
  }

  RE_Options &set_all_options(int opt) {
    all_options_ = opt;
    return *this;
  }
  int all_options() const {
    return all_options_ ;
  }

  // TODO: add other pcre flags

 private:
  int match_limit_;
  int match_limit_recursion_;
  int all_options_;
};

// These functions return some common RE_Options
static inline RE_Options UTF8() {
  return RE_Options().set_utf8(true);
}

static inline RE_Options CASELESS() {
  return RE_Options().set_caseless(true);
}
static inline RE_Options MULTILINE() {
  return RE_Options().set_multiline(true);
}

static inline RE_Options DOTALL() {
  return RE_Options().set_dotall(true);
}

static inline RE_Options EXTENDED() {
  return RE_Options().set_extended(true);
}

// Interface for regular expression matching.  Also corresponds to a
// pre-compiled regular expression.  An "RE" object is safe for
// concurrent use by multiple threads.
class PCRECPP_EXP_DEFN RE {
 public:
  // We provide implicit conversions from strings so that users can
  // pass in a string or a "const char*" wherever an "RE" is expected.
  RE(const string& pat) { Init(pat, NULL); }
  RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
  RE(const char* pat) { Init(pat, NULL); }
  RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
  RE(const unsigned char* pat) {
    Init(reinterpret_cast<const char*>(pat), NULL);
  }
  RE(const unsigned char* pat, const RE_Options& option) {
    Init(reinterpret_cast<const char*>(pat), &option);
  }

  // Copy constructor & assignment - note that these are expensive
  // because they recompile the expression.
  RE(const RE& re) { Init(re.pattern_, &re.options_); }
  const RE& operator=(const RE& re) {
    if (this != &re) {
      Cleanup();

      // This is the code that originally came from Google
      // Init(re.pattern_.c_str(), &re.options_);

      // This is the replacement from Ari Pollak
      Init(re.pattern_, &re.options_);
    }
    return *this;
  }


  ~RE();

  // The string specification for this RE.  E.g.
  //   RE re("ab*c?d+");
  //   re.pattern();    // "ab*c?d+"
  const string& pattern() const { return pattern_; }

  // If RE could not be created properly, returns an error string.
  // Else returns the empty string.
  const string& error() const { return *error_; }

  /***** The useful part: the matching interface *****/

  // This is provided so one can do pattern.ReplaceAll() just as
  // easily as ReplaceAll(pattern-text, ....)

  bool FullMatch(const StringPiece& text,
                 const Arg& ptr1 = no_arg,
                 const Arg& ptr2 = no_arg,
                 const Arg& ptr3 = no_arg,
                 const Arg& ptr4 = no_arg,
                 const Arg& ptr5 = no_arg,
                 const Arg& ptr6 = no_arg,
                 const Arg& ptr7 = no_arg,
                 const Arg& ptr8 = no_arg,
                 const Arg& ptr9 = no_arg,
                 const Arg& ptr10 = no_arg,
                 const Arg& ptr11 = no_arg,
                 const Arg& ptr12 = no_arg,
                 const Arg& ptr13 = no_arg,
                 const Arg& ptr14 = no_arg,
                 const Arg& ptr15 = no_arg,
                 const Arg& ptr16 = no_arg) const;

  bool PartialMatch(const StringPiece& text,
                    const Arg& ptr1 = no_arg,
                    const Arg& ptr2 = no_arg,
                    const Arg& ptr3 = no_arg,
                    const Arg& ptr4 = no_arg,
                    const Arg& ptr5 = no_arg,
                    const Arg& ptr6 = no_arg,
                    const Arg& ptr7 = no_arg,
                    const Arg& ptr8 = no_arg,
                    const Arg& ptr9 = no_arg,
                    const Arg& ptr10 = no_arg,
                    const Arg& ptr11 = no_arg,
                    const Arg& ptr12 = no_arg,
                    const Arg& ptr13 = no_arg,
                    const Arg& ptr14 = no_arg,
                    const Arg& ptr15 = no_arg,
                    const Arg& ptr16 = no_arg) const;

  bool Consume(StringPiece* input,
               const Arg& ptr1 = no_arg,
               const Arg& ptr2 = no_arg,
               const Arg& ptr3 = no_arg,
               const Arg& ptr4 = no_arg,
               const Arg& ptr5 = no_arg,
               const Arg& ptr6 = no_arg,
               const Arg& ptr7 = no_arg,
               const Arg& ptr8 = no_arg,
               const Arg& ptr9 = no_arg,
               const Arg& ptr10 = no_arg,
               const Arg& ptr11 = no_arg,
               const Arg& ptr12 = no_arg,
               const Arg& ptr13 = no_arg,
               const Arg& ptr14 = no_arg,
               const Arg& ptr15 = no_arg,
               const Arg& ptr16 = no_arg) const;

  bool FindAndConsume(StringPiece* input,
                      const Arg& ptr1 = no_arg,
                      const Arg& ptr2 = no_arg,
                      const Arg& ptr3 = no_arg,
                      const Arg& ptr4 = no_arg,
                      const Arg& ptr5 = no_arg,
                      const Arg& ptr6 = no_arg,
                      const Arg& ptr7 = no_arg,
                      const Arg& ptr8 = no_arg,
                      const Arg& ptr9 = no_arg,
                      const Arg& ptr10 = no_arg,
                      const Arg& ptr11 = no_arg,
                      const Arg& ptr12 = no_arg,
                      const Arg& ptr13 = no_arg,
                      const Arg& ptr14 = no_arg,
                      const Arg& ptr15 = no_arg,
                      const Arg& ptr16 = no_arg) const;

  bool Replace(const StringPiece& rewrite,
               string *str) const;

  int GlobalReplace(const StringPiece& rewrite,
                    string *str) const;

  bool Extract(const StringPiece &rewrite,
               const StringPiece &text,
               string *out) const;

  // Escapes all potentially meaningful regexp characters in
  // 'unquoted'.  The returned string, used as a regular expression,
  // will exactly match the original string.  For example,
  //           1.5-2.0?
  // may become:
  //           1\.5\-2\.0\?
  // Note QuoteMeta behaves the same as perl's QuoteMeta function,
  // *except* that it escapes the NUL character (\0) as backslash + 0,
  // rather than backslash + NUL.
  static string QuoteMeta(const StringPiece& unquoted);


  /***** Generic matching interface *****/

  // Type of match (TODO: Should be restructured as part of RE_Options)
  enum Anchor {
    UNANCHORED,         // No anchoring
    ANCHOR_START,       // Anchor at start only
    ANCHOR_BOTH         // Anchor at start and end
  };

  // General matching routine.  Stores the length of the match in
  // "*consumed" if successful.
  bool DoMatch(const StringPiece& text,
               Anchor anchor,
               int* consumed,
               const Arg* const* args, int n) const;

  // Return the number of capturing subpatterns, or -1 if the
  // regexp wasn't valid on construction.
  int NumberOfCapturingGroups() const;

  // The default value for an argument, to indicate the end of the argument
  // list. This must be used only in optional argument defaults. It should NOT
  // be passed explicitly. Some people have tried to use it like this:
  //
  //   FullMatch(x, y, &z, no_arg, &w);
  //
  // This is a mistake, and will not work.
  static Arg no_arg;

 private:

  void Init(const string& pattern, const RE_Options* options);
  void Cleanup();

  // Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
  // pairs of integers for the beginning and end positions of matched
  // text.  The first pair corresponds to the entire matched text;
  // subsequent pairs correspond, in order, to parentheses-captured
  // matches.  Returns the number of pairs (one more than the number of
  // the last subpattern with a match) if matching was successful
  // and zero if the match failed.
  // I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
  // against "foo", "bar", and "baz" respectively.
  // When matching RE("(foo)|hello") against "hello", it will return 1.
  // But the values for all subpattern are filled in into "vec".
  int TryMatch(const StringPiece& text,
               int startpos,
               Anchor anchor,
               bool empty_ok,
               int *vec,
               int vecsize) const;

  // Append the "rewrite" string, with backslash subsitutions from "text"
  // and "vec", to string "out".
  bool Rewrite(string *out,
               const StringPiece& rewrite,
               const StringPiece& text,
               int *vec,
               int veclen) const;

  // internal implementation for DoMatch
  bool DoMatchImpl(const StringPiece& text,
                   Anchor anchor,
                   int* consumed,
                   const Arg* const args[],
                   int n,
                   int* vec,
                   int vecsize) const;

  // Compile the regexp for the specified anchoring mode
  pcre* Compile(Anchor anchor);

  string        pattern_;
  RE_Options    options_;
  pcre*         re_full_;       // For full matches
  pcre*         re_partial_;    // For partial matches
  const string* error_;         // Error indicator (or points to empty string)
};

}   // namespace pcrecpp

#endif /* _PCRECPP_H */
usr/include/pcre_stringpiece.h000064400000014110150403561410012472 0ustar00// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
//
// A string like object that points into another piece of memory.
// Useful for providing an interface that allows clients to easily
// pass in either a "const char*" or a "string".
//
// Arghh!  I wish C++ literals were automatically of type "string".

#ifndef _PCRE_STRINGPIECE_H
#define _PCRE_STRINGPIECE_H

#include <string.h>
#include <string>
#include <iosfwd>    // for ostream forward-declaration

#if 0
#define HAVE_TYPE_TRAITS
#include <type_traits.h>
#elif 0
#define HAVE_TYPE_TRAITS
#include <bits/type_traits.h>
#endif

#include <pcre.h>

using std::string;

namespace pcrecpp {

class PCRECPP_EXP_DEFN StringPiece {
 private:
  const char*   ptr_;
  int           length_;

 public:
  // We provide non-explicit singleton constructors so users can pass
  // in a "const char*" or a "string" wherever a "StringPiece" is
  // expected.
  StringPiece()
    : ptr_(NULL), length_(0) { }
  StringPiece(const char* str)
    : ptr_(str), length_(static_cast<int>(strlen(ptr_))) { }
  StringPiece(const unsigned char* str)
    : ptr_(reinterpret_cast<const char*>(str)),
      length_(static_cast<int>(strlen(ptr_))) { }
  StringPiece(const string& str)
    : ptr_(str.data()), length_(static_cast<int>(str.size())) { }
  StringPiece(const char* offset, int len)
    : ptr_(offset), length_(len) { }

  // data() may return a pointer to a buffer with embedded NULs, and the
  // returned buffer may or may not be null terminated.  Therefore it is
  // typically a mistake to pass data() to a routine that expects a NUL
  // terminated string.  Use "as_string().c_str()" if you really need to do
  // this.  Or better yet, change your routine so it does not rely on NUL
  // termination.
  const char* data() const { return ptr_; }
  int size() const { return length_; }
  bool empty() const { return length_ == 0; }

  void clear() { ptr_ = NULL; length_ = 0; }
  void set(const char* buffer, int len) { ptr_ = buffer; length_ = len; }
  void set(const char* str) {
    ptr_ = str;
    length_ = static_cast<int>(strlen(str));
  }
  void set(const void* buffer, int len) {
    ptr_ = reinterpret_cast<const char*>(buffer);
    length_ = len;
  }

  char operator[](int i) const { return ptr_[i]; }

  void remove_prefix(int n) {
    ptr_ += n;
    length_ -= n;
  }

  void remove_suffix(int n) {
    length_ -= n;
  }

  bool operator==(const StringPiece& x) const {
    return ((length_ == x.length_) &&
            (memcmp(ptr_, x.ptr_, length_) == 0));
  }
  bool operator!=(const StringPiece& x) const {
    return !(*this == x);
  }

#define STRINGPIECE_BINARY_PREDICATE(cmp,auxcmp)                             \
  bool operator cmp (const StringPiece& x) const {                           \
    int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); \
    return ((r auxcmp 0) || ((r == 0) && (length_ cmp x.length_)));          \
  }
  STRINGPIECE_BINARY_PREDICATE(<,  <);
  STRINGPIECE_BINARY_PREDICATE(<=, <);
  STRINGPIECE_BINARY_PREDICATE(>=, >);
  STRINGPIECE_BINARY_PREDICATE(>,  >);
#undef STRINGPIECE_BINARY_PREDICATE

  int compare(const StringPiece& x) const {
    int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_);
    if (r == 0) {
      if (length_ < x.length_) r = -1;
      else if (length_ > x.length_) r = +1;
    }
    return r;
  }

  string as_string() const {
    return string(data(), size());
  }

  void CopyToString(string* target) const {
    target->assign(ptr_, length_);
  }

  // Does "this" start with "x"
  bool starts_with(const StringPiece& x) const {
    return ((length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0));
  }
};

}   // namespace pcrecpp

// ------------------------------------------------------------------
// Functions used to create STL containers that use StringPiece
//  Remember that a StringPiece's lifetime had better be less than
//  that of the underlying string or char*.  If it is not, then you
//  cannot safely store a StringPiece into an STL container
// ------------------------------------------------------------------

#ifdef HAVE_TYPE_TRAITS
// This makes vector<StringPiece> really fast for some STL implementations
template<> struct __type_traits<pcrecpp::StringPiece> {
  typedef __true_type    has_trivial_default_constructor;
  typedef __true_type    has_trivial_copy_constructor;
  typedef __true_type    has_trivial_assignment_operator;
  typedef __true_type    has_trivial_destructor;
  typedef __true_type    is_POD_type;
};
#endif

// allow StringPiece to be logged
std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece);

#endif /* _PCRE_STRINGPIECE_H */
usr/include/pcre.h000064400000031112150403561420010100 0ustar00/*************************************************
*       Perl-Compatible Regular Expressions      *
*************************************************/

/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.

           Copyright (c) 1997-2009 University of Cambridge

-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.

    * Neither the name of the University of Cambridge nor the names of its
      contributors may be used to endorse or promote products derived from
      this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/

#ifndef _PCRE_H
#define _PCRE_H

/* The current PCRE version information. */

#define PCRE_MAJOR          8
#define PCRE_MINOR          02
#define PCRE_PRERELEASE     
#define PCRE_DATE           2010-03-19

/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export setting is defined in pcre_internal.h, which includes this file. So we
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */

#if defined(_WIN32) && !defined(PCRE_STATIC)
#  ifndef PCRE_EXP_DECL
#    define PCRE_EXP_DECL  extern __declspec(dllimport)
#  endif
#  ifdef __cplusplus
#    ifndef PCRECPP_EXP_DECL
#      define PCRECPP_EXP_DECL  extern __declspec(dllimport)
#    endif
#    ifndef PCRECPP_EXP_DEFN
#      define PCRECPP_EXP_DEFN  __declspec(dllimport)
#    endif
#  endif
#endif

/* By default, we use the standard "extern" declarations. */

#ifndef PCRE_EXP_DECL
#  ifdef __cplusplus
#    define PCRE_EXP_DECL  extern "C"
#  else
#    define PCRE_EXP_DECL  extern
#  endif
#endif

#ifdef __cplusplus
#  ifndef PCRECPP_EXP_DECL
#    define PCRECPP_EXP_DECL  extern
#  endif
#  ifndef PCRECPP_EXP_DEFN
#    define PCRECPP_EXP_DEFN
#  endif
#endif

/* Have to include stdlib.h in order to ensure that size_t is defined;
it is needed here for malloc. */

#include <stdlib.h>

/* Allow for C++ users */

#ifdef __cplusplus
extern "C" {
#endif

/* Options. Some are compile-time only, some are run-time only, and some are
both, so we keep them all distinct. */

#define PCRE_CASELESS           0x00000001
#define PCRE_MULTILINE          0x00000002
#define PCRE_DOTALL             0x00000004
#define PCRE_EXTENDED           0x00000008
#define PCRE_ANCHORED           0x00000010
#define PCRE_DOLLAR_ENDONLY     0x00000020
#define PCRE_EXTRA              0x00000040
#define PCRE_NOTBOL             0x00000080
#define PCRE_NOTEOL             0x00000100
#define PCRE_UNGREEDY           0x00000200
#define PCRE_NOTEMPTY           0x00000400
#define PCRE_UTF8               0x00000800
#define PCRE_NO_AUTO_CAPTURE    0x00001000
#define PCRE_NO_UTF8_CHECK      0x00002000
#define PCRE_AUTO_CALLOUT       0x00004000
#define PCRE_PARTIAL_SOFT       0x00008000
#define PCRE_PARTIAL            0x00008000  /* Backwards compatible synonym */
#define PCRE_DFA_SHORTEST       0x00010000
#define PCRE_DFA_RESTART        0x00020000
#define PCRE_FIRSTLINE          0x00040000
#define PCRE_DUPNAMES           0x00080000
#define PCRE_NEWLINE_CR         0x00100000
#define PCRE_NEWLINE_LF         0x00200000
#define PCRE_NEWLINE_CRLF       0x00300000
#define PCRE_NEWLINE_ANY        0x00400000
#define PCRE_NEWLINE_ANYCRLF    0x00500000
#define PCRE_BSR_ANYCRLF        0x00800000
#define PCRE_BSR_UNICODE        0x01000000
#define PCRE_JAVASCRIPT_COMPAT  0x02000000
#define PCRE_NO_START_OPTIMIZE  0x04000000
#define PCRE_NO_START_OPTIMISE  0x04000000
#define PCRE_PARTIAL_HARD       0x08000000
#define PCRE_NOTEMPTY_ATSTART   0x10000000

/* Exec-time and get/set-time error codes */

#define PCRE_ERROR_NOMATCH         (-1)
#define PCRE_ERROR_NULL            (-2)
#define PCRE_ERROR_BADOPTION       (-3)
#define PCRE_ERROR_BADMAGIC        (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE  (-5)
#define PCRE_ERROR_UNKNOWN_NODE    (-5)  /* For backward compatibility */
#define PCRE_ERROR_NOMEMORY        (-6)
#define PCRE_ERROR_NOSUBSTRING     (-7)
#define PCRE_ERROR_MATCHLIMIT      (-8)
#define PCRE_ERROR_CALLOUT         (-9)  /* Never used by PCRE itself */
#define PCRE_ERROR_BADUTF8        (-10)
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
#define PCRE_ERROR_PARTIAL        (-12)
#define PCRE_ERROR_BADPARTIAL     (-13)
#define PCRE_ERROR_INTERNAL       (-14)
#define PCRE_ERROR_BADCOUNT       (-15)
#define PCRE_ERROR_DFA_UITEM      (-16)
#define PCRE_ERROR_DFA_UCOND      (-17)
#define PCRE_ERROR_DFA_UMLIMIT    (-18)
#define PCRE_ERROR_DFA_WSSIZE     (-19)
#define PCRE_ERROR_DFA_RECURSE    (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT    (-22)  /* No longer actually used */
#define PCRE_ERROR_BADNEWLINE     (-23)

/* Request types for pcre_fullinfo() */

#define PCRE_INFO_OPTIONS            0
#define PCRE_INFO_SIZE               1
#define PCRE_INFO_CAPTURECOUNT       2
#define PCRE_INFO_BACKREFMAX         3
#define PCRE_INFO_FIRSTBYTE          4
#define PCRE_INFO_FIRSTCHAR          4  /* For backwards compatibility */
#define PCRE_INFO_FIRSTTABLE         5
#define PCRE_INFO_LASTLITERAL        6
#define PCRE_INFO_NAMEENTRYSIZE      7
#define PCRE_INFO_NAMECOUNT          8
#define PCRE_INFO_NAMETABLE          9
#define PCRE_INFO_STUDYSIZE         10
#define PCRE_INFO_DEFAULT_TABLES    11
#define PCRE_INFO_OKPARTIAL         12
#define PCRE_INFO_JCHANGED          13
#define PCRE_INFO_HASCRORLF         14
#define PCRE_INFO_MINLENGTH         15

/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */

#define PCRE_CONFIG_UTF8                    0
#define PCRE_CONFIG_NEWLINE                 1
#define PCRE_CONFIG_LINK_SIZE               2
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD  3
#define PCRE_CONFIG_MATCH_LIMIT             4
#define PCRE_CONFIG_STACKRECURSE            5
#define PCRE_CONFIG_UNICODE_PROPERTIES      6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION   7
#define PCRE_CONFIG_BSR                     8

/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */

#define PCRE_EXTRA_STUDY_DATA             0x0001
#define PCRE_EXTRA_MATCH_LIMIT            0x0002
#define PCRE_EXTRA_CALLOUT_DATA           0x0004
#define PCRE_EXTRA_TABLES                 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION  0x0010

/* Types */

struct real_pcre;                 /* declaration; the definition is private  */
typedef struct real_pcre pcre;

/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */

#ifndef PCRE_SPTR
#define PCRE_SPTR const char *
#endif

/* The structure for passing additional data to pcre_exec(). This is defined in
such as way as to be extensible. Always add new fields at the end, in order to
remain compatible. */

typedef struct pcre_extra {
  unsigned long int flags;        /* Bits for which fields are set */
  void *study_data;               /* Opaque data from pcre_study() */
  unsigned long int match_limit;  /* Maximum number of calls to match() */
  void *callout_data;             /* Data passed back in callouts */
  const unsigned char *tables;    /* Pointer to character tables */
  unsigned long int match_limit_recursion; /* Max recursive calls to match() */
} pcre_extra;

/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
without modification. */

typedef struct pcre_callout_block {
  int          version;           /* Identifies version of block */
  /* ------------------------ Version 0 ------------------------------- */
  int          callout_number;    /* Number compiled into pattern */
  int         *offset_vector;     /* The offset vector */
  PCRE_SPTR    subject;           /* The subject being matched */
  int          subject_length;    /* The length of the subject */
  int          start_match;       /* Offset to start of this match attempt */
  int          current_position;  /* Where we currently are in the subject */
  int          capture_top;       /* Max current capture */
  int          capture_last;      /* Most recently closed capture */
  void        *callout_data;      /* Data passed in with the call */
  /* ------------------- Added for Version 1 -------------------------- */
  int          pattern_position;  /* Offset to next item in the pattern */
  int          next_item_length;  /* Length of next item in the pattern */
  /* ------------------------------------------------------------------ */
} pcre_callout_block;

/* Indirection for store get and free functions. These can be set to
alternative malloc/free functions if required. Special ones are used in the
non-recursive case for "frames". There is also an optional callout function
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
have to take another form. */

#ifndef VPCOMPAT
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
PCRE_EXP_DECL void  (*pcre_free)(void *);
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
PCRE_EXP_DECL void  (*pcre_stack_free)(void *);
PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *);
#else   /* VPCOMPAT */
PCRE_EXP_DECL void *pcre_malloc(size_t);
PCRE_EXP_DECL void  pcre_free(void *);
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
PCRE_EXP_DECL void  pcre_stack_free(void *);
PCRE_EXP_DECL int   pcre_callout(pcre_callout_block *);
#endif  /* VPCOMPAT */

/* Exported PCRE functions */

PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
                  const unsigned char *);
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
                  int *, const unsigned char *);
PCRE_EXP_DECL int  pcre_config(int, void *);
PCRE_EXP_DECL int  pcre_copy_named_substring(const pcre *, const char *,
                  int *, int, const char *, char *, int);
PCRE_EXP_DECL int  pcre_copy_substring(const char *, int *, int, int, char *,
                  int);
PCRE_EXP_DECL int  pcre_dfa_exec(const pcre *, const pcre_extra *,
                  const char *, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int  pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
                   int, int, int, int *, int);
PCRE_EXP_DECL void pcre_free_substring(const char *);
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL int  pcre_fullinfo(const pcre *, const pcre_extra *, int,
                  void *);
PCRE_EXP_DECL int  pcre_get_named_substring(const pcre *, const char *,
                  int *, int, const char *, const char **);
PCRE_EXP_DECL int  pcre_get_stringnumber(const pcre *, const char *);
PCRE_EXP_DECL int  pcre_get_stringtable_entries(const pcre *, const char *,
                  char **, char **);
PCRE_EXP_DECL int  pcre_get_substring(const char *, int *, int, int,
                  const char **);
PCRE_EXP_DECL int  pcre_get_substring_list(const char *, int *, int,
                  const char ***);
PCRE_EXP_DECL int  pcre_info(const pcre *, int *, int *);
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
PCRE_EXP_DECL int  pcre_refcount(pcre *, int);
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
PCRE_EXP_DECL const char *pcre_version(void);

#ifdef __cplusplus
}  /* extern "C" */
#endif

#endif /* End of pcre.h */
usr/include/pcre_scanner.h000064400000014710150403561420011616 0ustar00// Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
//
// Regular-expression based scanner for parsing an input stream.
//
// Example 1: parse a sequence of "var = number" entries from input:
//
//      Scanner scanner(input);
//      string var;
//      int number;
//      scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
//      while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
//        ...;
//      }

#ifndef _PCRE_SCANNER_H
#define _PCRE_SCANNER_H

#include <assert.h>
#include <string>
#include <vector>

#include <pcrecpp.h>
#include <pcre_stringpiece.h>

namespace pcrecpp {

class PCRECPP_EXP_DEFN Scanner {
 public:
  Scanner();
  explicit Scanner(const std::string& input);
  ~Scanner();

  // Return current line number.  The returned line-number is
  // one-based.  I.e. it returns 1 + the number of consumed newlines.
  //
  // Note: this method may be slow.  It may take time proportional to
  // the size of the input.
  int LineNumber() const;

  // Return the byte-offset that the scanner is looking in the
  // input data;
  int Offset() const;

  // Return true iff the start of the remaining input matches "re"
  bool LookingAt(const RE& re) const;

  // Return true iff all of the following are true
  //    a. the start of the remaining input matches "re",
  //    b. if any arguments are supplied, matched sub-patterns can be
  //       parsed and stored into the arguments.
  // If it returns true, it skips over the matched input and any
  // following input that matches the "skip" regular expression.
  bool Consume(const RE& re,
               const Arg& arg0 = RE::no_arg,
               const Arg& arg1 = RE::no_arg,
               const Arg& arg2 = RE::no_arg
               // TODO: Allow more arguments?
               );

  // Set the "skip" regular expression.  If after consuming some data,
  // a prefix of the input matches this RE, it is automatically
  // skipped.  For example, a programming language scanner would use
  // a skip RE that matches white space and comments.
  //
  //    scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
  //
  // Skipping repeats as long as it succeeds.  We used to let people do
  // this by writing "(...)*" in the regular expression, but that added
  // up to lots of recursive calls within the pcre library, so now we
  // control repetition explicitly via the function call API.
  //
  // You can pass NULL for "re" if you do not want any data to be skipped.
  void Skip(const char* re);   // DEPRECATED; does *not* repeat
  void SetSkipExpression(const char* re);

  // Temporarily pause "skip"ing. This
  //   Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
  // is similar to
  //   Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
  // but avoids creating/deleting new RE objects.
  void DisableSkip();

  // Reenable previously paused skipping.  Any prefix of the input
  // that matches the skip pattern is immediately dropped.
  void EnableSkip();

  /***** Special wrappers around SetSkip() for some common idioms *****/

  // Arranges to skip whitespace, C comments, C++ comments.
  // The overall RE is a disjunction of the following REs:
  //    \\s                     whitespace
  //    //.*\n                  C++ comment
  //    /[*](.|\n)*?[*]/        C comment (x*? means minimal repetitions of x)
  // We get repetition via the semantics of SetSkipExpression, not by using *
  void SkipCXXComments() {
    SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
  }

  void set_save_comments(bool comments) {
    save_comments_ = comments;
  }

  bool save_comments() {
    return save_comments_;
  }

  // Append to vector ranges the comments found in the
  // byte range [start,end] (inclusive) of the input data.
  // Only comments that were extracted entirely within that
  // range are returned: no range splitting of atomically-extracted
  // comments is performed.
  void GetComments(int start, int end, std::vector<StringPiece> *ranges);

  // Append to vector ranges the comments added
  // since the last time this was called. This
  // functionality is provided for efficiency when
  // interleaving scanning with parsing.
  void GetNextComments(std::vector<StringPiece> *ranges);

 private:
  std::string   data_;          // All the input data
  StringPiece   input_;         // Unprocessed input
  RE*           skip_;          // If non-NULL, RE for skipping input
  bool          should_skip_;   // If true, use skip_
  bool          skip_repeat_;   // If true, repeat skip_ as long as it works
  bool          save_comments_; // If true, aggregate the skip expression

  // the skipped comments
  // TODO: later consider requiring that the StringPieces be added
  // in order by their start position
  std::vector<StringPiece> *comments_;

  // the offset into comments_ that has been returned by GetNextComments
  int           comments_offset_;

  // helper function to consume *skip_ and honour
  // save_comments_
  void ConsumeSkip();
};

}   // namespace pcrecpp

#endif /* _PCRE_SCANNER_H */
usr/bin/pcregrep000075500000114770150403561420007674 0ustar00ELF> 0@8�@8
@@@@00ppp�p�p �{�{ �{ `	�
 `|`| `|   ���  ���DDP�td8k8k8k��Q�tdR�td�{�{ �{ PP/lib64/ld-linux-x86-64.so.2GNU�GNUGNU�;n/-�\E(�s�!++�A �
+.(�BE���|fUa�qX9�}�%� ���zQg��/�K���9 ����`���o�X��6H ��"o�D � �� 
@� 0� � �@� libpcreposix.so.0_ITM_deregisterTMCloneTablepcre_exec__gmon_start___ITM_registerTMCloneTablelibpcre.so.0pcre_studypcre_versionpcre_configpcre_compilepcre_maketableslibc.so.6__printf_chkexitreaddirsetlocalefopenstrncmpclosedirputs__stack_chk_failstdinisattyfgetsstrlen__errno_location__fprintf_chkstdoutfputcfclosestrtoulmallocopendir__ctype_b_locgetenvstderrfilenofwritefreadstrchr__cxa_finalize__sprintf_chk__xstatmemmovestrcmpstrerror__libc_start_mainfree_edata__bss_start_endGLIBC_2.3GLIBC_2.3.4GLIBC_2.4GLIBC_2.2.5/opt/alt/pcre802/usr/lib64�ii
ti	ii
(ui	2�{ 1�{ �0�{ �{ �{ dW�{ �X�{ �X�{ �X| �X| �X| �X| �X | dW(| �X0| �X8| �X@| �XH| �XP| �XX| �X0� dW8� �XP� �XX� �Xh� �� p� Yx� �d�� �� �� *Y�� �d�� 0� �� @Y�� MYȀ �� Ѐ 1Y؀ �d� gY�� e� 0� � mY� {Y(�  � 0� �Y8� @eH� (� P� �YX� �Yp� �Yx� pe�� �Y�� �e�� � �� �Y�� �YЁ Z؁  Z� >Z�� �e� LZ� �e0� XZ8� dZP� }ZX� (fp� �Zx� Xf�� � �� �Z�� �Z�� �Z�� �fȂ � Ђ �Z؂ �Z� �Z�� [� � � [� �f0� ([8� �fP� 4[X� gp� B[x� @g�� H[�� hg�� ؅ �� R[�� �gȃ � Ѓ b[؃ �g� ȅ � r[�� �g� Ѕ � �[� h0� �[8� �[P� �[X� �[p� �[x� @h�� �[�� �[�� \�� hhЄ \؄ �h� \� /\� � � � &� ( � +0� .@� 0�~ �~ �~ �~ �~ �~ �~ �~ 	�~ 
�~ �~ �~ 
�~       ( 0 8 @ H P X ` h p x  � !� "� #� $� %� '� (� )� *��H��H�f H��t��H����5�d �%�d ��h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h	��Q������h
��A������h��1������h��!������h
��������h��������h������h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h��Q������h��A������h��1������h��!������h��������h��������h������h �������h!��������h"�������h#�������h$�������h%��������%Mb D���%Eb D���%=b D���%5b D���%-b D���%%b D���%b D���%b D���%
b D���%b D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%�a D���%}a D���%ua D���%ma D���%ea D���%]a D���%Ua D���%Ma D���%Ea D���%=a D���%5a D���%-a D���%%a D�%���AWAVAUATUSH��H�$H��H�$H���|$<�H�t$0H�t$PdH�%(H��$�#1��h����D$P�����
������
��
=

��
H��8H��f �|$<�D$P�H�D$0H�h�}-��E�D$@E1����{H�L$X�H�L$HH��$�H�L$(�<-�X�}L�u@���SD��@�^` ��t&L�-O` 9�u�Yf�9�tLI�� A�U��u�HcD$PH�L$0�H�ZDL����H�=-e 1��6�����1���
���f.�A�UI�����A�>�����O2A�>@���b���A�UA�Dž����J�1�1�Hc\$P�����L$<��9��
��w
��H�D$0�\$PHc�L�4؃��b�����&I�E�\$PL�0���\$P;\$<},H�L$0Hc�H�,��}-u�E�������H�=e �W	��d ��~�=�d u��d �={d u�sd �
1d �Gd �5d �������������d H��d H��6H����1�H���N���H���P	����H�id H��d H��tPH�5�6H�������t=H�5~6H��������+��c H�=d6�]���H����
H�c H�d �H�=
6H��������KH�5B6H���9������4H�5�5H���"�������H�5 6H����������H�5�5H�������W
H�5�5H���������@
H�5�5H���������
H�5�5H���������H�5Z5H�������tH�5�5H����������Kc D����P�{DH�E�=H��H�D$����}I���
D��\ E����
H��$�L�-�\ D�|$DH�D$ D��+D$�D$8L�t$����)H��M�����I)�H)�H�|$�h�CD�T$8M���5I��H�\$(E���H�
r41�D)�I��H�߾D�T$I�����H��D�M�I��ATH�l$0H�
941���H�����Hc\$L��L�|$ H��L���[���AXAY���vH��H��L���A������`I�� A�}����	I�]�(H������=H��I�����I��M����H��tiI)�H�|$�D$8�|D9�u�Hc�H�|$H��H��������u�L�t$D�|$DI�A�>=�I����H�|$����A�����H�|$H���q������@���D�|$DL�5a2�H�|$�����u����|$@c��HcD$@�\$PL�t�`�@�D$@���A��A�}D���U-�\$PA���u���A��1�1�A�>�D����J�Hc\$P���	���A�}��H���H��2H��` �5���L��D�|$DL�t$H����J���M���A���A�>=�����A�u���A�U���b�����w���H�t$H�
L���'���H�T$X�:�I�U��\$P��������H��1H��_ �
���	��'���H�
�^ �K�H�=�;����+��������_ D����H�-�_ H��t!H�5�1H������������^ H�-v_ H��t!H�5�1H�����������^ � ���� H��H�_ ����H��^ H����H�����|$@�H�-�^ H����H�5f0H���/�������H�-�] L�-�] E1�L��$�H�� L�����H����M��A�I����������!�%����t������D�I�VLD��I��M9�v%����H��fDI��M9�tA�F��DB u�A�A����$��o���D��L���L������W����iHcD$P;D$<�H�L$0�P�D$@�T$PH��H�D$`D�t$@L�l$`A��A��1�I�|�����E����1�1҉��R����1۽L�%|] M��t9D�-x] �D$P1�E��~H�I�<���D$P���D$PA9��L����L�%/] M��t0�D$P1���tH�I�<��y�D$P���D$P9�|�L���b�H��$�#dH3%(����H�ĸ#[]A\A]A^A_�1ҍM������9���H��A9�����H�-�\ H�������H�5/H�������H�5/H���������\ ����H�5�.H���������[ ����H�f.H�X\ �z���H�G.H�E\ �g���H�7.H�2\ �T������d����i���H�5*.H���Y����H�=
[ �e���~�\[ �����������[ D���� �)����A[ �G����=�[ ������H�=�-���H��[ H���uH�=h-��H�k[ H����H�L-����H�
�Z ��1�H�=�-��j����H�
&[ I��H�=TZ H�m7���N����H�=2Z �dH�^61����%�������\$P����H9-�Y tH����1ۃ=�Z ��1�1�L�d$X���H��9�Z ~tH�sZ L�-dZ 1�L��H�<(I��k�H�|$XI�Etƃ=PZ �;Ƅ$�L��$�L�D$XL���1�H�=lY H�e7��k����H�=�Y H��t(L�Z H�L$TH�T$X1���H��Y H���[H�=�Y H��t(L��Y H�L$TH�T$X1��n�H�wY H����H�=oY H��t(L��Y H�L$TH�T$X1��:�H�+Y H����H�=CY H����L�cY H�L$TH�T$X1���H��X H����L�D$X�L$TH�R7�H�=fX ��l����A�غ�1�L��$�H�
�+L�������H��H�5�*���L�-�X H��H�������%��8��H�
�X �H�=�W I��H��01��������D$P;D$<�a�L$<E1��Q�9�A��E1���ŋD$P���D$P;D$<����H�L$0H�1��=�W @��D��H�<��v ��ą�uƒ�AD��HcD$PH�L$0H��*H��H�=HW �1��L��#��� �H�D$0H�43H����L�%W ��t_I�]�=H�����A��A)�H��t.I��L��H�Z3L��1������D$@E1���H��H���H��H��D�@��E�EL��H�F31��L�����V����M�H��H�[4H�=|V �1���{�����1��=!Q ~H�5V H�==V �����n���H�)�m�H��H��3���"������L�D$X�L$TH��4�H�=V �������L�D$X�L$TH�J4�H�=�U ������L�D$X�L$TH��3�H�=�U ���������V D����@���H��H��2���cV D����0���H�=h(�Q�H��������H��H��2�����f.�D��1�I��^H��H���PTL��&H�
&H�=h���O �H�=�T H��T H9�tH�^O H��t	�����H�=�T H�5�T H)�H��H��H��?H�H�tH�5O H��t��fD�����=}T u+UH�=O H��tH�=�J ���d����UT ]������w�����"U ������ATUS�������H9����T H��9L��9��A�����=�~	E���5�H�
�<��
��H9��*�?
�!�H�G�f��?
H�GtH��H9�r�H�GH9�v$�
u��H�G�D���7H9�r�H����H��H9�t�?
u��H�G��H9��5�����H��[]A\�f�H9�s�D�S H�-�8H��8E��A��D�D��A���~	E���vA�J�'A��
��~@A�������A��( A������z����[H��]�A\�f�A��
|z��X����H9�r�>���fDH��H9��+����?
u��H�G� ����H9�r����fDH��H9�������?
u�����f�H9�����H�������H9��e����8
�\����H������1�E�������
���fD��?L�C�IC#�A�i�Hc���E�������L�OE���I��A��?A��D	���u����f���?H�D�@D#�I��ɍ@A��Lc�E���i���H�GDD���H��A��?A��E	ȅ�u��B���f�H��H���dH�%(H��$�1�H���&�1҅�x�D$%�=@�/D�H��$�dH3%(��uH�Ĩ��X��AWAVAUATUSH��H�$H��H�$H��HL�=CG A� H�T$H�\$ A��H��H��F �A��H�
�!dH�%(H��$8 1�HciP �4�M�Ǻ 1�WH���V�AZA[D��Hc�P H��P H�L$H��L��P L�,�H�T$�D�Hc
�P H�vP I�EH�<�t;H�ȃ��gP �H��$8 dH34%(��H��H []A\A]A^A_ÐHc�O I�<�� ��\$H��)É\$��9�~�D$��H�|$H�l$t7H��H�=<O A��1�UL�D$H��&D���-�X1�Z�j���@E����1�E���H�
T"H�=�N �5��=�N H�
�N tDH���9u�D��gfff��D����)�D���)ƒ�ts������tu�th�Af�1H�=�N I��A��1�H�
�N H��%��r�1����H�=QN I���1�H�;%��I�1����f�A�nd�AfD��A�st�AfD�	뇿rd�Af�9�v�����f.��=�N c~GH��H� H��H�=�M H�
�A�d�HE�H�e%1���1�H���������f.�AWAVAUATUSH��H�$H��H�$H��8H��A��I��A��dH�%(H��$( 1���M t}���H�\$ H�H�$H�D$H�D$�DDM��H�
�H��I��I)� �A)�1��I�D��L��D��H�������t0L��H�T$H�4$H�����I�NjD$��u�D��L��D��H������H��$( dH3%(uH��8 []A\A]A^A_��J�f.�AWAVAUATUSH��(dH�%(H�D$1�H9���D��L E����H�D$A��H��I��I��1�H�D$�G�H�T$L��H�����H�
�K �H��H��I��H)���M9�vf��;-QL L��}XM��tH�=�K L��1�H�������K ��t�H�=�K D��E�~1�H���E�����k�����K H�D$dH3%(uH��([]A\A]A^A_��"�f�D�L E����AWI��AVA��AUE1�ATI��UH��SH��H��H��K D��H��E1�D�l$A�J�4�H��K J�<�jcAW���A�$ZY���7����H�=�J ���1�H��"����={K ~!�L$H�=�J H��1������H�
�J ��H�=���H��H��H�
qJ �l�H�5eJ �
�[���J ��u^A�$���t���t��J �R�H�=)J H�*"�1��&��6H�
J �H�=F"���CJ �P�:J ��8�J H��[]A\A]A^A_�I��D9-}J �����1�����Ґ1��H�
�I �&�H�="����v�fDAWAVAUATUSL��$���H��H�$L9�u�H��H�`H�|$pL��$0H��I��L��dH�%(H��$8b1���H�D$XL�H�D$L9��1H��$�M���D$`H�D$PH��$�H�D$@H��$�DŽ$�H�D$0�D$(�D$h�D$�D$lH�D$HH�\$H�T$PL��DŽ$�H���R�Hc�$�D��H L)�H)�H�D$ H��E��tL)�H��Lc|$hL�l$8L��L�l$HL�|$L�|$@�f��^H ���>�\H ���d�RH ����Hc�$���$�H���)�H�Hc�H������H ���9H�5�G �
���Hc�$�H�H)�H��L��L��H���A����5�G 9��S�QB �����
�G ���������=�G ����D�|G E������H�= G M��tL��H�6�1��5�H�=�F �DG ������L$H��1��	��'G H�=�F ������Hc�$�H��D��$�HپH��L)�A)�1���������Hc�$�H��HL$��fDH�=qF �H��1��������@L�1F �H�p1���a��H�=*F �?���DA���D$L�l$8A�ǃ��D$�TF ��t�RF ��t	E����
H�\$ Hc�$�H��H�D$h�I�I��@�D$hI9�vH�|$X�_�zL;d$�%���D�|$`D��E E��uED��E E���7D�
F E��~�D$(��tH�T$H�t$0L��������$�	�E �/@ ���D��E E��t\�|$l���H�=#E ��t"M��tL��H�5�1��4��H�=�D �L$lH�7�1�����
@A�H��$8bdH3%(D����
H��Hb[]A\A]A^A_�fDD�|$A���D$lL�l$8A�G�D$(Hc�$�HD$ �D$`L�H�D$0�D$(�D$�Z���@H�=YD A�ϾL��H��1��p���a���D�=�D I�� E���3�l$(����I�� H9t$0�c�@L������H�L$p� H�߾�o��H@H�D$XL�H�D$H�l$0 ����fDA������A���+D L�l$8���Z�D$(���NL�D$0M9���E1�H��$�H�\$L��D��H�t$8I��D�L$`D�T$x�fD;�C }L��L��H�ǃ��'�L9�r�H�\$D�L$`I��D�T$xL�|$0H;D$0�H�\$H�\$0H�l$`L��L�d$0L�d$8D�L$xL�l$8D�l$(D��$��<H�t$H��L����H�
�B H�߾H��I��H)�L������L9��dM��tH�=�B L��1�H�}���D�=�B E��t�H�=jB D��E�}1�H�Y�E���}���p������B �D$xD�\$xE���+��B �D$8���w�
�B ��~DŽ$�M��t1L��1�D�T$(H�=�A H�D�L$���D�T$(D�L$�B ���D�|$�B A�O�L$(��t\H��$�M��H�D$8��A ���	H�T$8H�t$L��D�T$0D�L$�\�D�|$D�T$0Hc�$�L)�D�L$H)�H�D$ ��A H�
=A ����Hc�$��HT$ L��D�L$�6��D�L$�~���@D�|$A��L�l$8A�G�D$(�`���I�� �@L�����H�L$pH�ߺ ��S���\$(H@H�D$XL�H�D$���������L;d$0��M9���D�D$(E���D$`t���k����=�A I�EM���D$D�=�@ H��$��D$���D�B���!I�@�L9���A�x�
u�p��x�
�^H��L9�u�M��L;D$0�‹|$(�D$x��������t���H��$�H�D$8M9��[H�\$(L��H�l$0H�l$8D�L$`L�l$8D�l$D�T$x�A�H�t$H��H���p�H�
y? H�߾H��I��H)�L�����M9���M��tH�=K? L��1�H�5�e���
{? ��t��L$E�}��1�H�=? H�
D)�E���3���n���fD����A�x�
I�@�u	L9��I��E����L9�w,����D���t@����
��v2I��L9������I�@�E����A�P���uǃ�
t
��
u�DM9��u���L;D$0�t$��9t$8�g����D$`��Z����D$�������H��$�fDI�@�L9��KA�x�
u�_��x�
�NH��L9�u�H9D$0I�������DH��������t�I�������( �������<���@A�H��ʁ����uH����ʁ����t��р�������?H�5+#L�C�[ɉ�$�H�
R#B#���$���E���������$�L�XA�3��I����?��	��u����DI�@�L9��{A�x�
u�Rf.��x�
tBH��L9�u�����@H9�����I���x�
������E���DI��H9�w��3���I���1���H�T$H�t$0L��|$(�*�I�� �@L���F��H�L$p� I��@������D$(H@H�D$XL�H�D$���H�\$H�l$`D�l$(D�L$xD��$�L�d$0L�l$8M9���^< L�|$0�H�
�; �H�=&D�T$8D�L$���D�T$8D�L$�< �D$x����L$�1�D�T$(H�=�; H��
D�L$���D�T$(D�L$���H�\$(H�l$0D�L$`D�T$xL�l$8�5�; ���K�����; ���=����C���@Hc�$�L�L9��$H��$�L��L�d$H�l$E��H�t$8I���f.��A��L��H��H�����A�T$H9�s�L�d$�T$H�T$8H�t$H����Hc�$�L)�H)�H�D$ ��Hc�$�L��D�T$8D�L$���L�n: 1�H�=}: �H������Hc�$����$�H�
L: )�I�<Hc��^��H�=7: �1�H�c��O��D�T$8D�|$L�d$0L�|$HL�t$8E��L�t$@�f�Hc�$�H�
�9 �H������L��9 1�H�=�9 �H�������Hc�$����$�H�
�9 )�H�<Hc����H�=~9 �1�H������Hc�$�L��L��H�H)�A�H��H�������L���Mc�L�d$0�Hc�$�H�
 9 HT$ L)�K�<D�|$L�t$8�%��D�L$�m�Q9 L�d$0�D$x����T$(���„������D$M��D$x������H�
�8 �H�=�
L�D$8D�T$0D�L$(���L�D$8D�T$0D�L$(���Lc�$�M�M9����L��H�\$H�\$H�l$ H�l$8L�d$`D�d$L�l$xA��D�L$(D�T$0����E��A��H��H��H����D��I9�s�D�L$(H�\$D�l$(H�l$ D�T$0D�d$L�l$xL�d$`�X���E��L�d$A�G�D$���A�x�
���I�@�����A��D$(DŽ$�H�D$0�D$l���I�����D��D�L$(H�\$�T$��H�l$ D�T$0L�d$`L�l$x�D$(�����-2 ���
�H�=97 L��1�E1�H��	��P���A�5]7 ���3����H��$�H�D$8� ���L�|$0���������AW�AVA��AUATA��USH��H�=0	H��H���dH�%(H��$�1������uY�=v1 ~@H�5i6 H�=�6 ����H��$�dH3%(���=H���[]A\A]A^A_�uA��t�1��@H���X�A�Ņ�ti�-�6 ��������H�5^H�����I��H���n�=�0 ~L��H����L�������`���uA��t�1���@H�T$ H�޿�&����x��D$8%�=�t��=6 u�f������fDH���8��I��H���DH��$��L�=�H�D$L������H����L�P�L��L�������tչL��H�=_�����t�H��H�
OE��1�ARH�|$�I�ؾL�T$ �M��L�T$ L�����H�|$H�D$(���ZY��L�T$��H�=35 H��t-jL��E1�E1�j�L$(1�L�T$ ���A[Z��L�T$�)���H�=5 H��t#jE1�E1�L��j�L$(1��U��_AX�������H�|$1�D���[��������������
^4 ��������U���8�N��H�='4 H��H��I���1�����n���f�H�=q4 H��t.jE1�E1�L��j�L$(1�L�T$ ���AYAZ��L�T$�V���H�=?4 H���)����G�����5�3 �����������8���H�=�3 H��H�XI���1��������@L���P������������D��������SH�
83 �H�=j��"���=. H�53 ��t,H��- f.���~���H�5�2 H�� �{��u�H��#�H�=������BH�
�2 �H�=�����[��AT�WUSH��dH�%(H�D$1�����H��Hc�H�>����
�2 ���H�\$dH3%(�SH��[]A\��
u2 ����D�.2 ������2 ���f.��2 ���f��.2 ���f���1 ���o������1 ���W������1 ���?������1 ���6, ����D�������fD�, �������~1 ����������H�=1 �H��H��1����1�������1 ��
���f���+ ���������+ ���g�����
�0 ���R���f�H�=	
����H�=5
����H�=a
���H�=�
���H�=�
���H�=�
���H�=����D�)+ E���H�+ H�l$A��h��D$   H�KH��H�5i1�����D��L�CH�
Y)¸H�5`���N�1�H�� �m��D�CE����E��~���H��1�H�
������fD��/ ����/ �-���D��/ ��������H�=^/ �1�H���[���������/���H�=�	�l��H�=5
�`���dH�5d
1�����H�=y
�<��H�=�
�0��1��������@��AWI��AVI��AUA��ATL�%4% UH�-4% SL)�H�����H��t1��L��L��D��A��H��H9�u�H��[]A\A]A^A_Ðf.������H��H���%s%.*s%scommand-line %s-%d-pattern number %d to this text:
%s:%d:%d,%d%c[%sm%c[00m--
%d
..rb%.512s%c%.128sUsage: pcregrep [-Options:-%c,  %s --%s                    %.*s%s
pcregrep version %s
pcregrep: Unknown option -%c
LC_CTYPE--localeLCC_ALLcranyanycrlf%s%.*sautoLC_ALLneveralwaysPCREGREP_COLOURPCREGREP_COLORCRANYANYCRLFreadrecurseskippcregrep: malloc failed
 number %dpcregrep: Unknown option %s
\E\E\b\E)$^(?:\b\Q^(?:\Q  terminate optionshelpdisplay this help and exitafter-context=numberbefore-context=numbercolor=optionmatched text color optioncountcolour=optionmatched text colour optiondevices=actiondirectories=actionhow to handle directoriesregex(p)=patternfixed-stringsfile=pathread patterns from filefile-offsetsoutput file offsets, not textwith-filenameno-filenameignore-caseignore case distinctionsfiles-with-matchesfiles-without-matchlabel=nameset name for standard inputline-offsetslocale=localeuse the named localemultilinerun in multiline modenewline=typeline-numberonly-matchingquietrecursiveexclude=patterninclude=patternexclude_dir=patterninclude_dir=patternno-messagessuppress error messagesutf-8use UTF-8 modeversioninvert-matchselect non-matching linesword-regex(p)line-regex(p)(standard input)1;31pcregrep: Error in command-line regex at offset %d: %s
pcregrep: Error in %s command-line regex at offset %d: %s
pcregrep: Error in regex in line %d of %s at offset %d: %s
pcregrep: Too many %spatterns (max %d)
pcregrep: pcre_exec() error %d while matching pcregrep: error %d means that a resource limit was exceeded
pcregrep: check your regex for nested unlimited loops
pcregrep: too many errors - abandoned
pcregrep: Failed to open directory %s: %s
pcregrep: Failed to open %s: %s
] [long options] [pattern] [files]
Type `pcregrep --help' for more information and the long options.
Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]Search for PATTERN in each FILE or standard input.PATTERN must be present if neither -e nor -f is used."-" can be used as a file name to mean STDIN.All files are read as plain files, without any interpretation.
Example: pcregrep -i 'hello.*world' menu.h main.c

When reading patterns from a file instead of using a command line option,trailing white space is removed and blank lines are ignored.There is a maximum of %d patterns.

With no FILEs, read standard input. If fewer than two FILEs given, assume -h.Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.pcregrep: Data missing after %s
pcregrep: Too many command-line patterns (max %d)
pcregrep: Malformed number "%s" after --%.*s
pcregrep: Malformed number "%s" after -%c
pcregrep: Cannot mix --only-matching, --file-offsets and/or --line-offsets
pcregrep: Failed to set locale %s (obtained from %s)
pcregrep: Unknown colour setting "%s"
pcregrep: Invalid newline specifier "%s"
pcregrep: Invalid value "%s" for -d
pcregrep: Invalid value "%s" for -D
pcregrep: Error while studying regex%s: %s
pcregrep: Error in 'exclude' regex at offset %d: %s
pcregrep: Error in 'include' regex at offset %d: %s
pcregrep: Error in 'exclude_dir' regex at offset %d: %s
pcregrep: Error in 'include_dir' regex at offset %d: %s
pcregrep: Unknown option letter '%c' in "%s"
set number of following context linesset number of prior context linesset number of context lines, before & afterprint only a count of matching lines per FILEhow to handle devices, FIFOs, and socketsspecify pattern (may be used more than once)patterns are sets of newline-separated stringsforce the prefixing filename on outputsuppress the prefixing filename on outputprint only FILE names containing matchesprint only FILE names not containing matchesoutput line numbers and offsets, not textset newline type (CR, LF, CRLF, ANYCRLF or ANY)print line number with output linesshow only the part of the line that matchedsuppress output, just set return coderecursively scan sub-directoriesexclude matching files when recursinginclude matching files when recursingexclude matching directories when recursinginclude matching directories when recursingprint version information and exitforce patterns to match only as wordsforce patterns to match only whole lines�������������������������������������������������������������������������������������������������������X�������������@�����(����������������x��`���P� ��;����������h����x���`���������lx�����x�����l�������(����h�,(���PzRx�(��/D$4����pFJw�?:*3$"\���`Xt���.W�A�A �����p ���Z
ABC�
DHCP���p �������hG�[
At�����B�B�B �B(�A0�A8�G� L�@I�AP�AL�AJ�AB�Av
8A0A(B BBBB}�AM�AZ�AC�Ad��UM{P|X��B�B�B �B(�A0�A8�G� L�@I�@�
8A0A(B BBBAH���B�B�B �B(�A0�A8�D`
8A0A(B BBBAl����R�E�E �E(�D0�D8�GPlXB`JXAP
8A0A(B BBBA`������CP������T�X��yB�B�B �B(�A0�A8�H��Q
G��}
8A0A(B BBBG������B�G�E �B(�D0�A8�T�
\
8A0A(B BBBAI�
N�
y�
A�
[�
K�
R�
A�
[�
K�
L�
B�
��
K�
R�
B�
t���A��0�4��B�D�A �D0]
 AABDd����aF�B�B �B(�A0�A8�G� L�@L�G��GI�Hz�GB�G�
8A0A(B BBBA$zRx��G������,p���	Dl�eF�E�E �E(�H0�H8�G@n8A0A(B BBB�@�1�0�{ dW�X�X�X�X�X�X�XdW�X�X�X�X�X�X�Xb�>�
�V�{ �{ ���o��8
Y�~ � �
@	���o���o�
���o�o*
���op`| �� 0@P`p�������� 0@P`p�������� 0����dW�X�����X�XA�� Y�dB�� *Y�d����0� @YMYC�� 1Y�dcgYe����0� mY{YD � �Y@ed(� �Y�Ye�YpeF�Y�ef� �Y�Y����Z ZH>Z�ehLZ�eiXZdZl}Z(fL�ZXf����� �Z�Z�����Z�f����� �Z�ZM�Z[N� [�fn([�fo4[gqB[@grH[hg����؅ R[�g����� b[�g����ȅ r[�g����Ѕ �[hs�[�[u�[�[V�[@hv�[�[w\hhx\�h\/\
GA$3h86400
GA$3h864��GA$3a1 0O0GA$3a1O0O0
GA$3p864O0O0GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFY�GA*GA!GA*GA!stack_realignGA$3a1O0O0GA$3a1��GA$3a1�V�VGA$3a1P0	1
GA$3p8641\VGA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA$3h86400
GA$3h864��GA*�0GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!
GA$3p864`V�VGA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protection
GA*FORTIFYGA+GLIBCXX_ASSERTIONSGA*GA!GA*GA!stack_realign
GA$3h86400
GA$3h864��GA$3a1�V�VGA$3a1�V�VGA$3a1��GA$3a1�V�Vpcregrep-8.02-9.1.el8.x86_64.debug�o��7zXZ�ִF!t/��_#]?�E�h=��ڊ�2N�/*_��B:�
�3�1�ܖn4��蒶cqO<a6{���u��Jik̡(���\Gg_�ĕ؄��	e'6TX����X,�J�0��~K8�%Ň?�A�]	�l�&-,~�\�<�2��B[�pY�I��K�M�`FS����E���WRUG���[[��S�!�9��=Gv�xoH��͋�_B!3{c�0�
��}ibO?�|��&�W�
:fe�v�c�����5��v*������B{iλV�#;���g�^)F��?.��;Y�'{;Ɛީ���}L��_�Hm_���=������g�<���,���������ܯt*��q�� m�➋����(8"J�T�^�0op��P�<4�}*��1@�v,�[$^�+���`S��]����_�'�*'����0
���,�C-�&),�b{��i��+�р�0 ��/2���F] ̠QG���_�
Bs��/%/ LO�m'�'����P���GF�5�e�:�ˆ�����]p���2s���^�Y�_��9ad�
��>���8#�a�)z/W�F���2�h~ɱ��f�:���m�l�
Q���� �r_,�@3�n8=��6aС�y�)�UK6.�iJn�/�c\��""׆.c$�+&䢊A����,�բ�&�њpg�E���f���SC�DK����ѣڞ�n�yl=��x�(�ez+��$hg*����p��yU��u�\�+�E)E��I��c��g�k˟Т,�EҜx�B�s�vL�Y�ܢ�!�O��ȝH�ͮ?]Z�P�L�f�Lb��f���Q�k����K1N�E���Z$���3�4� �U�s�B��k�d����mRV>�I�����j�Q#�ru-)Ns�=0\�(6o�-/
�ţg����mR�
�ɘ�i�VH���'B~�\eU��|���ϲ��G���Nɭ���4*ѕbUfw�Vњ���YIJ�R��:�#7\Թr�KbHʩ91U��ǔ.�4 �Q���9��"P���M �����!��V����P��M��]|b��&��"
����g�YZ.shstrtab.interp.note.gnu.property.note.ABI-tag.note.gnu.build-id.gnu.hash.dynsym.dynstr.gnu.version.gnu.version_r.rela.dyn.rela.plt.init.plt.sec.text.fini.rodata.eh_frame_hdr.eh_frame.init_array.fini_array.data.rel.ro.dynamic.got.data.bss.gnu.build.attributes.gnu_debuglink.gnu_debugdatapp�� &�� 4��$G���o��<Q88�Y��Ya���o*
*
bn���o�
�
P}�
�
@�B  �������p�@@`���58��V�V
�WW8 �8k8k���k�k���{ �{��{ �{��{ �{� �`| `| ��~ �~p�� �  � �  
@�`�X h�(/��d�>usr/bin/pcretest000075500000152400150403561420007706 0ustar00ELF>0s@@�@8
@@@@00ppp�� 0�0� 0� �� ���� ��   ���  ���DDP�td(�(�(���Q�tdR�td0�0� 0� ��/lib64/ld-linux-x86-64.so.2GNU�GNUGNU��|#��d��5��.�${�
�8�A !�
8;A(�BE���|fUabGsDX[��qXp\�9�f��{��N�~qU ����/�<��(N+���9;{
H��UC ���
��	a����%v�f�R �"��t � � 2�� B0� �8� wH� &� lX� �`� @� �P� libpcreposix.so.0_ITM_deregisterTMCloneTablepcre_infopcre_exec__gmon_start___ITM_registerTMCloneTablepcre_freeregexecregcompregerrorregfreelibpcre.so.0pcre_studypcre_free_substringpcre_versionpcre_stack_mallocpcre_calloutpcre_get_named_substringpcre_mallocpcre_copy_substringpcre_get_substringpcre_copy_named_substringpcre_configpcre_get_stringnumberpcre_stack_freepcre_get_substring_listpcre_compilepcre_fullinfopcre_free_substring_listpcre_dfa_execpcre_maketableslibc.so.6fflushstrcpy__printf_chkexitsetlocalefopen__stack_chk_failclockstdinfgetsstrlen__errno_location__fprintf_chkstdoutfputcfputsmemcpyfclose__ctype_b_locstderrsetrlimitfwritefreadstrchr__ctype_tolower_loc__cxa_finalizegetrlimitmemmovestrcmpstrerror__libc_start_main_edata__bss_start_endGLIBC_2.3GLIBC_2.3.4GLIBC_2.14GLIBC_2.4GLIBC_2.2.5/opt/alt/pcre802/usr/lib64�ii
7ti	A���Mii
Xui	b0� t8� �s@� @� `� q�h� �p� �x� ��� ��� ���� ���� ���� ���� ��� ��� ��� 
�ȸ 
�и �ظ �� �� !�� $��� '�� *�� -�� 0�� 7� � :�(� }�0� =�8� ?�@� A�H� F�P� ��X� M�`� O�h� V�p� R�x� P��� w��� 7��� 7��� 7��� U��� X��� [��� 7��� M�ȹ O�й V�ع R�� P�� w�� 7��� 7�� 7�� U�� X�� [� � 7�(� M�0� O�8� V�@� R�H� P�P� w�X� 7�`� 7�h� 7�p� U�x� X��� [��� 7��� M��� O��� V��� R��� P��� w��� 7�Ⱥ 7�к _�غ ^�� e�� l�� p��� P�� x�� |�� ��� �� � ��(� ��0� ��8� ��@� ��H� ��P� ʘX� ɘ`� Ϙh� Øp� Șx� Θ�� Ԙ�� ݘ�� ��� ��� ���� ���� ��� 
��� �Ȼ �л  �ػ (�� .�� ��� 6�ȿ п ؿ � 3� 5 � 80� ;8� <@� AH� =P� BX� ?`� @0� 8� @� H� P� X� `� h� 	p� 
x� �� �� 
�� �� �� �� �� �� �� Ⱦ о ؾ � � � �� � � �  � ! � "(� #0� $8� %@� &H� 'P� (X� )`� *h� +p� ,x� -�� .�� /�� 0�� 1�� 2�� 4�� 5�� 6�� 7��H��H��� H��t��H����5� �%� ��h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h	��Q������h
��A������h��1������h��!������h
��������h��������h������h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h��Q������h��A������h��1������h��!������h��������h��������h������h �������h!��������h"�������h#�������h$�������h%�������h&�������h'��q������h(��a������h)��Q������h*��A������h+��1������h,��!������h-��������h.��������h/������h0�������h1��������h2��������%�� D���%�� D���%�� D���%�� D���%�� D���%�� D���%�� D���%}� D���%u� D���%m� D���%e� D���%]� D���%U� D���%M� D���%E� D���%=� D���%5� D���%-� D���%%� D���%� D���%� D���%
� D���%� D���%�� D���%�� D���%� D���%� D���%ݚ D���%՚ D���%͚ D���%Ś D���%�� D���%�� D���%�� D���%�� D���%�� D���%�� D���%�� D���%�� D���%}� D���%u� D���%m� D���%e� D���%]� D���%U� D���%M� D���%E� D���%=� D���%5� D���%-� D���%%� D��AWAVAUATUSH��H�$H��(
Hc<� dH�%(H��$1�H�N� A��H��H��H�D$(�k���H��H��� �\���H��H�j� �M���H�V� H��� H��� A���sAH�]L�}DŽ$�A�DŽ$��;-DŽ$�DŽ$�-DŽ$��D$h�D$��@�I��E1�D��DŽ$�M��L�%�jI��DŽ$���H��L��������&�H��H�=�j�������H��H�=qj��������H��H�=Vj��������H��H�=;j�����t6H�5,jH��L�T$D�\$���D�\$L�T$����DŽ$�A����DŽ$�A��uwD��$�Hc�$�H�<����H��$�H������H�5�a�1�����HDŽ$��&fDA�A����A��t�Lc�O�4�I��;-�����D��$�A��L��Hc�$�H�<�����H��$�H���p���Ic�H�5�iH��L�dI�<$���H�D$(H����?A��t$H�\H�5�iH�;�Z���H�� H���}>H�sOD�L$H�w� H�XH��� H��NH�k� H��UH�U� E������$�L�%v� ��$��H�\$(L��H�ei�_� �e� H���UH���zH9ݖ H�=^� tH��H�=� �E���H�=F� ���L�%� �
���H�D$HL�8L���H���E�,GI��fA�� u��i���H�CH�D$A��<��A����
A��\��
H�l$�CH��hf.����(
�UL�}<\�`
A8��c
H��$�H�l$L)�Hc�H-U� I��\uH��$�L��$��\H��I��H��$�H�=� L��E1�L�5�n��?�����$��D$@DŽ$��KH�$�� ��$��D$0�D$��$��D$l�D$�D$8HDŽ$�DŽ$�@��tI�A�I�o<n��
��Ic�L�>���D$���DŽ$����A�OA��I���u�1틜$��$�����$���2H��$�L��$0H�D$H�L$L��D��L��L��$����H�D$ H����#H�|$ 1�1�H��$@��V��$At
�ϔ �=� �"H�D$ �|$8�@H�D$XH��� H�D$p��!H�D$PH�D$�|$@��H�\$ ��{ȉ�CȉC�CȉC�V�{f�C�V�{f�C�sV�{f�C�fV�{f�C�YV�{f�C�LV�{f�C�?V�{f�C�2Vf�CH�D$H��tH�@�ʉ�PʉP�P(ʉP(�|$�-(�|$��"H����"H�5�eH������H��H���|2�D$XH��$H�ٺ�ȉ�$�D$Pȉ�$����H��v#L�|$XH�|$ H�پL�����I9��R4�N����8����H�=X� H��H��hI���1�����H�����H�|$ �USH�D$H��tH���CSH��$�H���W	H���*S�J	A�OA��I������A�OA��I������A�OA��I�����A�O�D$0I�����A�OA��I�������� A�OI�����A�OA��@I�����A�OA��I�����A�O�D$8I�����DŽ$�A�OI���h���A�OA��I���T����� A�OI���=���A�GI��< vf�I��A�< w�H��s�A�1�H�����H���h8��� ���A�H��$�����A�OA��I������A�O�D$I�����A�O�D$lI�����A�O�D$@I�����A�OA�� I���{���A�O�D$I���D$�^���A�OA��@I���J���A�O�D$I���5���A�OA��I���$���A�OA�� I������A�H��tfDH���8u�H�\$H�P�H��DQ t@H���P��DQ u����$��$�����D���A��t��A��t��A��t�� A��t��@A����L��$PL��L����Ņ��H�D$H�D$ fDH�yJƄ$H��� H�D$DŽ$@Ƅ$�ď �ʏ �ď ��� ?B��� ������� H��tH� �L�5f� E1�H�l$(�F@H9-� L�5J� tH�5y� L���i�L�52� L����Hc�A��A�D�<
�{Ic�H�eH��I�4�pMH��u�E����L�5� H�T$HA�D�H�
�DA �SA�\$�Hc�� �A�D�H�S��DA �,H��A�܅��A�E����H�D$HH�A�H��DB tDI��A�H��DB u$�L�%P� E1��D$H��$�DŽ$��D$XM��M�勄$��D$xE�̉D$@H��$H��$�H�D$8H��$H��$�H�D$@��DŽ$�DŽ$��D$P��t#I�N��\��A�/I��I��A�.@�Ņ�u�Hc
$� A�E��L+=�� D�$�H�͋�$�Ic�E��D)��$�D�|$Hc�D�L$`��H�y�A��M�d-�L��H)�Mc�L�L���|�E��t�T$X���!:Hc\$@E1��~H�<����I�ŋT$P�L��L��H��$PA��H��A��DE�D������DE�D������H��DE��U�A�ą��jH��$P�@	��H�
ٌ ��H�=�b�;�L��������H��H�=�^�������A�OA��I�����A�OI��A���B� ���DŽ$�A�OI���}���A�OI���p���H�5
]H���y�D�\$L�T$����H�5�\H���X�D�\$L�T$����*H��H�5�\L�T$ D�\$�-�A��D�\$L�T$ �D$���\$����I�GH��$HD�\$8I�<�L�T$0H�t$ H�D$�?L�T$0D�\$8��$�H��$H�8��7I�H�5G\L�T$8D�\$0H����D�\$0L�T$8��t+H�5#\H��L�T$8D�\$0�w�D�\$0L�T$8����+�[H�D$H�t$ D�\$0L�T$I�<��$?L�T$D�\$0�D$hH��$H�8��8��A���of�H�=�� H�]�1��6���$��$��1�H�D$(H9� ��L�%E� DH�|$(1�L���HH���}L�%!� L���	���~:H�t$HHc�A�T�H��DQ t���H��@A�T�H���DQ t�����u�H�5� �
�$���tL��$PL����H��$�H��t H���IH�5�Z1������ ����L�%t� ���L��DH�|$(H��H���GH��H���c
H�D$(H9� tH�5g� H���W��E��fD��t��UL�}��L���r�f�A�vI�V��@��x�uH�=`c@��Hc�H�>��fDH�
� �+�H�=`R�c���$�HDŽ$��;���E����H�T$HH�
�DA �����A����H)�L��L�I�|
��D�d$`E��t�D$X��tH�=�V���D$X�D$@�L$@�VUUUE1��D$pE�����)‰����$�����$��F@H�|$��H�%^E��H��Pj�L�t$(I�FL��PjD�l$`AUSD�D$8�L$@H�|$P�bHH��0E��L��H��]H��PI�F(j�PjAUSD�D$8�L$@H�|$P�.HH��0A��E���KA����	E���=H�D$ �t$�@�3�p��	%`=@u&�D$�L$��9�~Hc��|
��f�HcD$�=� �p���J
�D$lD$0�s�D$8�t$8����CE1�9���L$0����)D$Hc�HՃD$pD�d$PD�T$hE	�E�����t$X���������$�����H�D$H���hH�H�D$8H��L�t$E��H��I�H��$@L��I�F�D$@PSD�D$�L$ H�|$0�u�I�&�A��XZE�������D�l$`E���Q	��$��L$lL$0�L$8D9���	��$�G�,$E1���A��E��d�f.�H��T�1��}�Jc<�B�t�H�� )�H��=H�5ԅ �
���E��u	E���4I��E9�~1F��D��H�=�� �E��y�H�fT�1�I����E9��E1�L��$E��M��D��$���A��A�� t^E��s�A�M��D��D��H��H���������H�='� A��D��M��H�m[�1�A����A�� u�f���$L��$�L��$L�t$ uC�@H�=Ʉ M��L��1�H�;[��8�L���`�H�M�lA�}�VH��M��M��D��hH��H��L����AYAZA����y�H�=d� L��1�H��Z���뜾<H����H��H���=�H�|$1�H���H��H��H��H�HD$�P�A�DW tH���P�A�DW u��H�|$H�5�U�-�H��H���"H��$H������H���8��$D��$�Lc�L��L�|$X�;H�]� H�پH��I��H�D$ H�T$pL����I9��S1A�H�=Z� ���=ERCP��&H�
;S��ERCP��$D��L�D$H�qL��Lc�1�L��$PL�|$P��H�|$ L��1�1���DH��$PH������� M����"I�0�K:H��L���H�x0H�H�xH�D$���I9��=2H�=�� H�L$H��Y1����H�|$�BH�|$ �B�
fD�D$�l������D�L$`E1�H�D$8E���H��$pD�t$@D��$�E��I��f.�h�E��H��A��AUAVSD�D$(�L$0H�t$8H�|$@��H�� D;|$hu�D��$��<�f�f��H+D$8�H*��YhH�=�� H�O�*L$h���^��^�g�����fD;D$�^�A��b���H�=a� A��D��1�H��W�������E1�L��$HD�t$x�fDA��A�� tfE��s�M��D��D��H��H��������L��$HH�=� A��D��H��W�1�A���X�H��$H�[�A�� u�D��$L��$�L��$HL�t$ uQ�@H�=�� M��1�H��$HH�0W����H��$H��L����H�M�lA�}thM��M��D��H��H��L����A����y�H�=#� L��1�H�vV���H�=� A��D��1�H��V��p����D��$�E�������L��$HD��H��H��L��������E�|$�E1�1�E��
�ADI��H��$HD��H�=� H�eV�N��1����I�EM9�u�Ic�H��H��$HH�<t%H�
F �#�H�=MM��H��$H�+��t$8���������D$0�D$l���fD�D$`����h�E��H��H��$xP�D$PPSD�D$(�L$0H�t$8H�|$@�r�H�� A���E���U���A��������H�
�~ �
�H�=rU����|$@~6H�
m~ ��H�=<M���Hc�sH�J~ )�H�|�6H�57~ �
�U���D��$�E��L�|$ D�l$@AUL��E��H��SD�D$A���L$ H�t$(��_AXD;t$hu����fDL��$H�L���+���$H��
�k�����
�b���=

�W�������(�������E�������H�
�} ��H�=sS���JcD��t$H�[} )�H�|�'5H�5H} �
�f��}�����D$@E��H��PSD�D$�L$ H�t$(H�|$0��A^A��XE����E���V�����$������|$��H�L��9����������������<�t��H����DD�D$@D��1�H�=�| H�kJ��D�d$8E�����|$0H�=t| �H��R�1����g����D$8D��$��D$0�D$l�����0����H�D$H���H�

| �H�=$NH��$���g���$�������D��$��D$0�D$l���H�=�{ ���1�H�pR�(��x�|
�H�q�V�Hc
�z H�B{ H������D��1�L�'{ H�=X{ H�Q�������0����H�D$H����H�
 { �)�H�=�H���D��$��R�����$�E1��H��u�"�DI��I9���G�\�E��x�H�=�z D��1�H��I�4��KcD�C�t�H��z )�HRz H���b2H�5�z �
���M��u�@��t�H�
hz ��H�=ZP����IcE�t$H�Cz )�H�y H���
2H�5+z �
�I���<���H�
z �!�H�=�G�w��D��$����H�D$(H9^y ��1�H�|$(t
H�|$(�T��H�=�y H��tH;=!y t�:��H�=sy �~��H�=_y �r��H�=Ky �f��H��$��Y����H��$dH3%(�!&H��([]A\A]A^A_�A������|$p�)�H�
:y �	�H�=)P��������H�=y �H��JH��1�����<�D�勄$��$���(H�|$ H��t�8H�D$H�����H����8���H�=�x D��1�H��O�"���}����@�DŽ$�����H�L$H�=qx H�~O1������H��������H�D$HA�~{H�H�D$`�vH�\$pI��1�L��$�D��$�I��E�,$H�D$`B�h��t,������H�B,���f%f���I����'��0)�M9�u�L��H�\$pD��$�L��$����H�ѽ��H�ѽ	��H�ѽ
��H�D$HA�N1�H�0H��DNt�L�H���lH����DNu�H�|$�-H�D$Hc�I��H�H�h�V�H�ѽ
�?�H�ѽ�2�H�ѽ�%�H�ѽ��H�ѽ���L$PI�����L$PI������v I������L$PI�����H�D$HA�N1�H�0H��DNt�L�H���lH����DNu�H�|$��H�D$Hc�I��H�H�h(�}��L$PI�։�%����%��	��L$P�U�H�D$HA�v�D$@I��1�H�H��Dq��#I�����TP�A���Dqu�T$@9�$�}CH��$����Hc\$@H�<��{��H��$�H���0"�D$@H��$���$��|$@�HD����D$P����%����I��
�D$P��I���D$X�~�DŽ$�I���k�H�D$HH�0A�F�F������QI���A��L$PI���1���$��$��{I��A���H�D$HA�vH�H���4pf���@���W��+�,��-�)"��!��!��*�y���DŽ$@A�~-�f!I����k�
M�ƍL1Љ�$@A�6��$@M�F@���Dxu�DŽ$��щ�$@�\��L$P�I���L��L$PI���?��L$P I���/�H�D$HA�vI�֋T$H�H��Dq�
�I�����TP�A���Dqu�T$���H�5�s H��H�T$`��5�����	D$PH�T$`I��I��A�~�>u���H�D$H�~�I��H��H�0�L�A��DVt�PȀ�v�|��L��M9�uۃ=8s t�����!���V�I���X���$��&H�|$ L��1��N��H��$0H�D$H����H�=	s H��E�1��~��H�D$P�4��H�T$ ��r �H�=�r �H��B�R��H�|<)�1��;������H����I�H�5{CL�T$ D�\$H������D�\$L�T$ �����[�D$h �A������$���DD$h��$����H�5Ar �
1��]��H�|$(�]���H�D$(H9�q �K����<���Hc
Wq H��q L����2��L��q ��1�H�=�q H�QD�1��]��H�D$(H9Aq �?����H�5�q H����3�����A	��L��L�}A��>u��M�������D��$�H�=rq 1�H��CH��$0�����H�D$(H9�p ����X�DŽ$���H�\$ H��$8�1�H����2�1�H��H��$��2�1�H��H��$�2�1�H��H��$�2�1�H��H��$�2�1�H��H��$$�y2�1�H��H��$(�b2�	1�H��H��$H�K2�1�H��H��$�42�
1�H��H��$�2�1�H��H��$ �2H��$H��$H���^����$A�����i9���D��$��$A9���H��$@D��$A9���H��$8H�D$pH9��t��$H�=�o H��:1�������$��~H�=�o H�vC�1������$(��L�Zo ��$����$ ��H��$@H����H�
�@�L�)?LD�H�=(?�� HD�H�5)?��HD�H�!?��HD�H�$?��HD�L� ?�@LD�� L�D$L�?M��L�?LD�M�ũL�?LD��M��L�
?LD�M��L�?�LD�M���LD�L��>�LD��L�L$L�
�>L�D$@LD�L��>�ASLD��WH��>L��VHEȾ1�RH��9S�t$0ATAUAVAW�t$`��$����H��`��$�7H��$@L��m %pH=0����H=@��H=Pu L�Ѻ!�H�=�9���L��m ��$����@��������L�D?H� >LE��=?m �kH�\$HHc�H��Bf�������H��A�L��1������$H�=m ��������L��>H��=LE��=�l ��H�\$HHc�H��Bf�������_H��A�1��A���|$8����H�|$��L�|$H�\$ H��$,�HDŽ$PL��$PL��H���D.��$,�1�H�=Wl H��8����L��L��H���.H�
1l H��$P����H�=4A������H=��H= �Y���L�Ѻ�H�=C@�N��L��k �4���H�
�k �C�H�=�5�%��H�\$ ��k L�-�k D�K�;ERCP�D$�S�K�CD��tf��f���f������D��A��Hc�H�l$xH�H�HD$ I��H�D$`������D$@D�D$E����L��H�:L��1�H+L$`����A�<$@��t�2H� GHc�H�>��|$�AA�L$�L��1�H��9f�����H��A�$H�\b L��H�<�1����A�$H��PL��
��H�I����D�D$E���H���L���H�=J9�����H���A�L$�L��1�H��=H� Pf�������A�$�1�M�<A�<F�k���<K�v<M�[���A�OA�Gf��f����D��f���{H��=�L��1��Q��A�?M��A�H��O������L���H�=O=���A�<$P��M�|$1�E1���$�L��$�D���D�cD�����H�A������s�����~߃�-�+��]�"�C�^��L����:��D�u�A9�~=E9�t
L��-���A��-�IA��]�?��!��^�L��D������A�^�������A���H�A��؃���s��:���A�D$H�9` �L��H��;H��1�1����A�D$��<��
L��{���A�<$A��A�L$�L��1�H�<f�������A�$<@�<<E�n����A�L$�A�^��H�w;�L��1��o��A�<$4tL���H�= ;�?��A�L$�L��1�H��;f�����,��A�$<3��1�<8���L��+�������L���H�=Q6�����T$@I�t$L���L��{���j��A�<$'��A�L$�L��1�H��:f�������A�$<&�'<+�Y����m���L���H�=�5�Y��A�<$8�A�D$H�q^ L��H�<�1����A�D$��<��H�
�DA�|$A�t$��H�����H�������D��D9�u�D��D9�u�H�Hk��H�zGH��L��1��H��9����zA�L$�A�^�:H�t9�L��1�����B���A�L$H�S9�A�^��H�J9��L���L��H�=�8�9���l$@H����L��H���t��H�HÀ;t�L��
I������P���E�D$H�
T8H�S8fA��E���L��1�1��������L���1�H�=�8������A�D$H�W8f����f����
�L��1�1�����]���A�D$H��7f����f��u�L���1�H�=�7�F���#���A�L$Hc�H�a\ H��7L��f���ɾL��1�1��������|$�{L���H�=\3����E�D$A�$H�\ H�57fA��H��E������1����E�L$�L��1�E�D$QH��7H�
�7A�D$fA��E��f����P1����^_�K���E�D$A�t$��H��D����H�������
�
9�u��JA9�u�H�H��AHk�D�H��DI�Hc�H�2[ H�371�H���L��1��������L��1�H�l$xH�
6H�	6����L��C�H�=;-����2��L���L��H�=�1����l$@H����L��H�����H�HÀ;t��G���A�L$L��5H��5�&���Hc�H�kZ H�61�H���L��1��/�����L���H�=x1�����������E1�L��$0H�D$ H��$�H�D$H�L$L��D��L��L��$�����H��tH�����A��D;�$�u����f�f��H+D$ �H*��YmHH�=b H��+�*�$����^��^HH�c���������8�r��H�L$�H�=�a I��H��31��0�������L$PI�����H�4Y L��H�<��x��A�H�]G������H�D$H����H�@�H�D$P����/���8����H�=9a H��H��6I���1��������DŽ$��a��A�L$�L��1�H��/f�����p���v���H�D4�L��1��U�������T$@I�t$L�����A�$H�QX L��H�<������L���H�=�3����C���H��3�L��1������x���L���H�=�3������H�=:` A���1�H�!+����j�H�
` ��H�=2�z���D$8H�D$H���q����$�HDŽ$�DŽ$��D$��$��D$0�D$�D$l���H�5�0H������D�\$L�T$ ����H�5e0H��L�T$ D�\$���D�\$L�T$ ��u�|$��
H�560H��L�T$D�\$���D�\$L�T$���DŽ$��o��H�=_ H��1�H��4���H�D$H������L�|$PH�xH�پL���M��I9��a��H�=�^ H��1�H�a4�7���k��H�D$P������^ I�����H�L$H��'�1����H�������H�/2�L��������H�=D^ H��(�1�����a�H�=%^ H��(�1�����)�H�=^ H�O(�1��{������A�^����L�Ѻ�H�=o2�?����1��k�
H��A�L�D�H�zB�D@u�I����	�$����H�=�*H�T$`蔿��H�T$`I�����A�|$A�t$��H�m>���H������q�
9�u��J9�u�H�H�5;Hk��H�F>H��L��1��H�T0�����L��$���$�I�� A�<$P��A�<$O�L��H��.H�
�,HE�H��01��8�����0H�T$`�D���H�T$`H�D$H���H�
R.�B��H�L$I���	�U�H��I��A�6I�~H�iH��pu�H�t$H�|$ f��6�������	H�l$�A���0H�T$`�ſ��H�T$`H�D$H���L��D�uL�}E������A����A��tF�L��L����hHc�L�A��u�L��-�޾��H��L���n��H�H���uL�u�}H��<���f.���H������6	�
9�u��J9�u�H�H�X9Hk��H�i<H�H��.�L��1�L��譿�������uL�u�}H�#<�����H��������
9�u��J9�u�H�H��8Hk��H��;H�H��.�D��$�I��1�I��L��$�I��L��$��'�a���f����L�L$pH�B,�f�����'��0)�E�iH�\$`I�AH�D$pB�kL���u�L��L��L��$�D��$�L��$�<}����=Z �{	����D	I�I����r���I��H�|$ L��1�������;�$�u�I���L���L)�H��M��tL��詻��f�f��H�=�Y H��#�H*þ��Y@�*�$��^��^�?�����P�H�H�5$+���T����>�H�=`Y H��H��+�Խ����$��$����DŽ$�DŽ$�DŽ$����DŽ$�DŽ$�DŽ$�DŽ$�-DŽ$��D$h�D$DŽ$�DŽ$�DŽ$�隿��I�$��H�5:*�t������H�
�X ��H�=�,H���޼���`H��$H1�H��L�
#*H�VH���P�L�׾f����P��$4D�D�H��1�H�M,螼��Hc�$4H�$XXZ��$(L��W �P���$(�����H�D$8I���	�U�H��I��A�I�~H�hH��Nu�H�t$8H�|$ f�蟻�����/H�l$8���1��k�
H��A�L�D�H�zB�DFu���	D$x�2�L���1�H�=g*�û����L�Ѻ'�H�=j#襻����L�Ѻ�H�=_+臻��L�W �m�L�Ѻ�H�=t+�b���L��V �H�L�Ѻ�H�=�"�=���L��V �#�H�
�V ��H�=q"������L�Ѻ�H�=�*����{�L�Ѻ!�H�=�!�غ��L�YV ��L�Ѻ�H�=�!賺��L�4V ���L��%���H��$�1�L��蓺�����L��\�!������L��?1��
����:�L��?����(�A�\$A�D$M�|$f�����ۃ����@���������H�=�U H�m)�1��	�����H�=�*��1��ܹ��L�]U A�L�=�*L���GH�D$HH��DX@tvH�� tpD��H��*1�A���褹��H�=U H��H������H��$PA��A���H���؃���s�A��K4��T ��u�E�D$�A��^v�D��L���1��<���A��H�=�T �H���H�=�)�����A�H�=�T �H���
蚷�����H�b)�1�������A�^���s�H���
�H�=
)覸���p�H��(�L��1�蛸�����L��^�)����S���H�
�S ��H�=�(�W������H�
�#����A������D��H�:"L��1���1������L��\迶��������H�5Z�]����;�H�L$8H�=dS H�e 1���ٷ��H�l$8�\���
���H�={!�N�������I��������S I��1�1��k�
�I�֍t�A�I�VD��B�D@u�@��t�5�R ��R ��!����1�1��k�
�L��A�L	�D�
L�BB�DHu�@���m���
�R �b��H�R I�����H��$HH�D$ I�GH�D$�N��H�
"���H�L$H�=OR H�P1�����H�l$�G��H�
�!���1ۃ�$��+�����H�L$H�=R 1�H�)��z���H�|$ �����d���D$h ��r��H�
�!��L��?�����K�|�H��$HL�T$��L�T$D�\$��H��$H�8tFI��*�L��$H�L$`L����1�H�L$`�p��A�,H��I�A�,9��I���K��L��$P�L�T$L��D�\$蹵����L�����H�H��$P�ʹ��D�\$L�T$���%����¿H�5
1��ƴ����<���H�=�P ��1�H�EL�L$`�;���L�L$`���L��$��L�L$`L���1�L�L$`�H��A�,H��I�A�,9��I���Q���H�L$H�=]P H�H"1���Ҵ���D$8�L�L��$PL���E����G��H��H�5
!I���^����Å�t6H�5�!L���I�����tH�5�!L���6������1��1������M���H�5� �H��$@H��1�L�-`蘳��H�=� L�%u!赱��1�H�������$@L��H�5� IEԿ1��Y���H���̲����$@L��H�5�IEԿ1��*���H��蝲����$@H����
t;H����
t/H��=

t!H�����t��H��H��HE�H�5 �1�輲��H��L��$P�'�����$@H��H�kHD�H�5��1��z���H�������$@H�5�1���S���H���Ʊ����$@H�5�1���,���L��蟱��H��$PH�5�1������L���w���H��$PH�5�1���ܱ��H���O�����$@H��H��HD�H�5J�1�袱�����L���1��H�5W肱���}�ɴ�����1�I��^H��H���PTL�H�
�H�=H����rL �H�=�L H��L H9�tH�NL H��t	�����H�=qL H�5jL H)�H��H��H��?H�H�tH�%L H��t��fD�����=�L u+UH�=L H��tH�=ND �y����d����eL ]������w���������������������������A��A��E����A��Ic�E�H�H�
�1H�I��D��I�H����H������?�ɀ�JI9�u�A��Mc�L�D	�@�>øE1���A�렸A�듸A��ATI��UH��S�螰��H���u�DH���]��t�DZ u��]1��D��H���DC��]��DJu�I�,$[]A\�@f.�ATA��UH��SH������A�t$�1�H��f�L���DD���B+�u	L�BH9�u�[]A\�f�f.�H��H�=�� ���H�=����H�=5����H�=a���H�=U��H�=y���H�=��ج��H�=��̬��H�=���H�=!贬��H�=�訬��H�=�蜬��H�=萬��H�=A脬��H�=��x���H�=��l���H�=�H���\���f�f.�USH��D�����D��D����������?H�R/�H��/�RD#�H���A���tP�F�ƒ�����E1Һ�"��DH��A��A��A����~����?����A	�A9�u�D��A����H�
�1������#f�A�@�^v'D��H��1�1۾�߭��H����[]�fDH��D��1��c���H����[]�1�D��H��1���蠭��H����[]��H���1��}���뜐f.���UH��SH���~���H�Ë�H ��uH��H��[]��H�=�H I�؉�1�H�:��!���H��H��[]����USH��H������H�?H H�ŋBH ��u
H��H��[]�H�=QH I���1�H������H��H��[]��AWAVI��AUATI��US1�H��f.��n���~bE�>�=�G E��D��������G I�����説��E��H�B�Pf������t3M��tL��D���Ӫ������n����H����[]A\A]A^A_�fDM��tD��H��L��1���������X����A�G�^���@D��������fD������A�����u����uD��F I�����A�^�����M���F���L��������7���f.������Hc�H�=N,�RD��A�Fɉ�E!���A��@���������z�M��L��A�@I��A��A��A������������?��A	�L9�u�A���{���A�����A������A������1�A���������9��=�����9��2���)�Hc�D�ɉ���@�L$�ת��HcT$H�H���Pf���������M��tH���L��1��N�����������뇸뀸�v���f�f.���AWAVAUATU1�SH��H���oE ��WE t-H�-jE ���H��tH���H�=輩���sH�{H���
���Hc{�s H��A��)�H{����sH�{1�A�����Hc{ �sH��A��)�H{����H��t
H��
�
����KH�=�D ����D��D ���VH���1��3���1�E��~@H�5�D � ��货��A9�u�H�5�D �^螧��E��~;E�~�1�A��tDH�5YD � ���t���D9�u�H�5@D �^�^���E)�1�E)�E�eA���|@H�5D � ���4���D9�u�K<�LcC8H��H�=�C ���D�L�C 1��]���H�5�C �
���H�C0��C H��t
�(���g1��C 9Cu��C 1�;yC �{C @��H����[]A\A]A^A_�fDH����H�=>�ǧ�����f��OD�G(H�1��H��E1�E1�譧���K$L�-���_���f.�L��H��1��~���H�CH��Jc< B�t )�H{���H��
����C$A��I���D9��[���H�CD���B� ��y�H�o�H��1��������K81��H�Z����{8c�����H�
aB ��H�=9�æ�����fDH�=9B ��1�H�"謦����������A SH����uH��[�գ��DH��H�=�A �1�H���c���H��[骣��f.�AWI��AVAUATUSH��H��8L�5uA H�|$H�T$(��@ DL��A��L)�A)�A���~[H�D$H9�@ �)H�T$D��L��虤��H���0L������HcЅ�~A�|�
�I�L�5�@ �y@ ���T$ Lc�D$L��躤��L��I��诤��L��I��H�D$蟤��M����M��I�������M��T$ ��Lc�L��L��L)�L��L��4���L�m@ L��L��M�<,L��L�\$ �����D$L��M���? �a���H�=B@ �U���L�\$ L���H���H�L$L�%,@ L�-@ �T$H�
@ ���DH�t$(�1��������f.�L9�HD�H��8H��[]A\A]A^A_�H�=�? �L$�1�H��y�����O���Df.�����? SH����uH��[镡��DH��H�=�? �1�H���#���H��[�j���f.�AWAVAUA�@ATI��U�����S1�H��(HcD$pH	H�<$L�|$`H�T$D�t$h�L$D�D$D�L$f�H�L$xIc�L��H�AVAWD�L$,D�D$(�L$$H�T$H�|$����ZY;�$�t.���}���ul�SD9���B�+D������A����E�D9�t ��~,A�T-D������A���s���D��A���e���DD��E��U���DA���D$p��H�I!$H��(D��[]A\A]A^A_��H��$�H�=Q> �$E��H��
�1����D�$�f.�S���x�����x[�f�H�=> A�؉�[H��1�逢��H��H��H����H	��@f.�U�H��H�52
SH��H���S������H�5
H���2� �����H�5�H����0�����H�5�H�����P��tr�H�5�H�����@��tU�H�5�H�������t8�H�5�H������tH�~H��H��1���g���1�H����[]�f.�f���AWI��AVI��AUA��ATL�%$4 UH�-$4 SL)�H�����H��t1��L��L��D��A��H��H9�u�H��[]A\A]A^A_Ðf.������H��H���Usage:     pcretest [options] [<input file> [<output file>]]
Input and output default to stdin and stdout.This version of pcretest is not linked with readline().  -b       show compiled code (bytecode)  -C       show PCRE compile-time options and exit  -d       debug: show compiled code and information (-b and -i)  -dfa     force DFA matching for all subjects  -help    show usage information  -i       show information about compiled patterns
  -M       find MATCH_LIMIT minimum for each subject
  -m       output memory used information
  -o <n>   set size of offsets vector to <n>  -p       use POSIX interface  -q       quiet: do not output PCRE version number at start  -S <n>   set stack size to <n> megabytes  -s       output store (memory) used information
  -t       time compilation and execution  -t <n>   time compilation and execution, repeating <n> times  -tm      time execution (matching) only  -tm <n>  time execution (matching) only, repeating <n> timesCallout %d: last capture = %d
Error %d from pcre_fullinfo(%d)
PCRE: setrlimit() failed with error %d
  %sUnicode properties support
  POSIX malloc threshold = %d
  Default recursion depth limit = %ld
** Unknown or malformed option %s
** Failed to get %d bytes of memory for offsets vector
Data in %s is not a compiled PCRE regex
Compiled regex%s loaded from %s
** Delimiter must not be alphanumeric or \
Compile time %.4f milliseconds
Memory allocation (code space): %d
  Study time %.4f milliseconds
------------------------------------------------------------------
Count disagreement: pcre_fullinfo=%d pcre_info=%d
First char disagreement: pcre_fullinfo=%d pcre_info=%d
Options disagreement: pcre_fullinfo=%ld pcre_info=%d
Size disagreement: pcre_fullinfo=%d call to malloc for %d
Capturing subpattern count = %d
Partial matching not supported
Contains explicit CR or LF match
Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s
Duplicate name status changes
Forced newline sequence: CRLF
Forced newline sequence: ANYCRLF
First char at start or follows newline
Subject length lower bound = %d
** Character \x{%x} is greater than 255 and UTF-8 mode is not enabled.
** Truncation will probably give the wrong result.
no parentheses with name "%s"
** Can't use dfa matching in POSIX mode: \D ignoredExecute time %.4f milliseconds
Matched, but too many subsidiary matches
Matched, but too many substrings
** PCRE error: returned count %d is too big for offset size %d
string list not terminated by NULL
**Match limit not relevant for DFA matching: ignored
Options:\x%02x\X{%x}\x{%x}stack_malloc %3d %p
malloc       %3d %p
\x{%02x}--->%2d: <unset>
%2d: %+3d 
    %3d %.*sCallout data = %d
stack_free       %p
pcretest: malloc(%d) failed
free             %p
Minimum %s limit = %d
cr>any>bsr_anycrlf>bsr_unicode>Unknown newline type at: <%s
No CRANYCRLFANY???CR, LF, or CRLF onlyall Unicode newlinesstackheap (byte-inverted) (neg) dupnames no_utf8_check utf8 no_auto_capture ungreedy extra dollar_endonly bsr_unicode bsr_anycrlf dotall firstline multiline extended caseless anchored (caseless)-s-m-q-b-i-d-M-dfa-o-t-tm-S-p-CPCRE version %s
Compiled with  %sUTF-8 support
  Newline sequence is %s
  \R matches %s
  Internal link size = %d
  Default match limit = %ld
  Match recursion uses %s
--helprb** Failed to open %s
wbPCRE version %s

  re> Failed to open %s: %s
Study data loaded from %s
No study data
    > ** Unexpected EOF
** Failed to set locale "%s"
JS>** Unknown option '%c'
Failed: POSIX code %d: %s
Failed: %s at offset %d
Failed to study: %s
End    %s
Opt %.2x %s NC Close    %s %d%3d %s    Cond recurse any    Cond recurse %d    Cond nrecurse any    Cond nrecurse %d    Cond def %s 0,    %s    [^%c]    [^\x%02x]    [^%c]{    [^\x%02x]{    \%dCallout    %s %d %d %d    %s %s    [\p{%s}\P{%s}]%s{%d,}{%d,%d}Error %d from pcre_info()
Max back reference = %d
Named capturing subpatterns:
  %s %*s%3d
No options
Forced newline sequence: CR
Forced newline sequence: LF
Forced newline sequence: ANY
No first char
First char = '%c'%s
First char = %d%s
No need char
Need char = '%c'%s
Need char = %d%s
Study returned NULL
No set of starting bytes
Starting byte set: 
  %c \x%02x Unable to open %s: %s
Write error on %s: %s
Compiled regex written to %s
Study data written to %s
data> No match: POSIX code %d: %s
Matched with REG_NOSUB
 0+ match()match() recursion** /%c loop abandoned
copy substring %d failed %d
%2dC %s (%d)
copy substring %s failed %d
  C %s (%d) %s
get substring %d failed %d
%2dG %s (%d)
  G %s (%d) %s
get substring list failed %d
%2dL %s
Partial matchNo match
Error %d
Failed to read data from %s
\A\G\K\B\b\D\d\S\s\W\wAllAnyAnybytenotprop\R\H\h\V\vextuni\Z\z^$charcharnc**?+?*+++?+nclassxclassRefRecurseAltKetKetRmaxKetRminAssertAssert notAssertBAssertB notReverseOnceSBraSCBraSCondCond refCond nrefCond recCond nrecBrazeroBraminzero*PRUNE*SKIP*THEN*COMMIT*FAIL*ACCEPTSkip zero+�����������+���������������������������������������������������������������������������+��������������������������������������������������������������������������������������������������������������š������������������s���^���J���-��������������ݕ��ɕ������k���T���@�������(������������������������������������ה����������������������������������������������Ô��������������������������������������������������{���������������������q��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������u���°�������������f���������������S���C��� ���|���T���������ܮ�����������������̮�������������������������������������������������������������������{���������%����������������������k���o����������������������������������������������������������������������������������������ֽ��޼��޼��޼��޼��޼��޼��]���]���]���޼��޼��޼��]���������������������������ɻ��ɻ��ɻ��������������ɻ��޼��޼��޼��޼��޼��޼��:���:���:���޼��޼��޼��:���������������������������/���/���/�����������������������������������G�������G�����������վ����������������������������������N���������������M=%N+3<DMSUiBpsvC{���	�
��>���
��O����%-4=PNXQnR�S�T���D����� �!�E�"�#�U�����	�FG	$
V&)%3&;=
@'L?PSV(\He)p*|W�X�+�,������@�A����I�-��Y�J��./	K0)10283A4HZQ[Z5`6g7n8s9{:�;�L�<����AnyArabicArmenianAvestanBalineseBamumBengaliBopomofoBrailleBugineseBuhidCCanadian_AboriginalCarianCcCfChamCherokeeCnCoCommonCopticCsCuneiformCypriotCyrillicDeseretDevanagariEgyptian_HieroglyphsEthiopicGeorgianGlagoliticGothicGreekGujaratiGurmukhiHanHangulHanunooHebrewHiraganaImperial_AramaicInheritedInscriptional_PahlaviInscriptional_ParthianJavaneseKaithiKannadaKatakanaKayah_LiKharoshthiKhmerLL&LaoLatinLepchaLimbuLinear_BLisuLlLmLoLtLuLycianLydianMMalayalamMcMeMeetei_MayekMnMongolianMyanmarNNdNew_Tai_LueNkoNlNoOghamOl_ChikiOld_ItalicOld_PersianOld_South_ArabianOld_TurkicOriyaOsmanyaPPcPdPePfPhags_PaPhoenicianPiPoPsRejangRunicSSamaritanSaurashtraScShavianSinhalaSkSmSoSundaneseSyloti_NagriSyriacTagalogTagbanwaTai_LeTai_ThamTai_VietTamilTeluguThaanaThaiTibetanTifinaghUgariticVaiYiZZlZpZs�����������������!!@�@��.A;�x���H{��x~��8������0���D(��px���X���������,h��`����x���������d���h��������������8���zRx�(��/D$4w��@FJw�?:*3$"\(z��0t����(�l��bB�D�D �TAB(����DB�D�D �vAB�����D�@����EA�A�D �
CAGQ
CAA^
CAH0<���YE�D�D V
DAHcDA0p���YE�A�G ]
DAAcDAH����B�B�E �B(�D0�A8�FPw
8C0A(B BBBGH�D��yF�B�B �B(�A0�C8�G@
8C0A(B BBBG<x��FK�K
JaH\����B�E�B �B(�A0�A8�Gp�
8D0A(B BBBA�L��FK�K
JaT�|��VB�B�B �H(�D0�F8�F`BhBp^hA`�
8D0A(B BBBH ���0A�L
CM@���$T���A�P�G �CAT|8z���MF�B�B �B(�A0�A8�G� L�4��4B�4M�4B�5G�5A�5W�4Q�4F�4A�4B�5B�5A�5W�41�4A�4[�4A�4��4N�4P�4B�4_�4L�4B�4A�5\�4�4O�4E�4A�5\�4��4J�4X�4B�4��4A�4Z�4D�4�
8A0A(B BBBA�4G�4K�4L�5H�5D�5B�5B�5B�5B�5D�5G�5I�4�	�4e�4H�4A�4��4S�4n�4A�4D�0��eF�E�E �E(�H0�H8�G@n8A0A(B BBBX��t�s@� q����������������
�
����!�$�'�*�-�0�7�:�}�=�?�A�F���M�O�V�R�P�w�7�7�7�U�X�[�7�M�O�V�R�P�w�7�7�7�U�X�[�7�M�O�V�R�P�w�7�7�7�U�X�[�7�M�O�V�R�P�w�7�7�_�^�e�l�p�P�x�|�����������������ʘɘϘØȘΘԘݘ�������
��� �(�.���6���n
h�0� 8� ���o��	H
�� �HH	���o���o�
���o�o
���ov�� @P`p��������    0 @ P ` p � � � � � � � � !! !0!@!P!`!p!�!�!�!�!�!�!�!�!"" "0"@"P"`"P�
GA$3h864(s(s
GA$3h864�%�%GA$3a10s_sGA$3a1_s_s
GA$3p864_s_sGA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFY�GA*GA!GA*GA!stack_realignGA$3a1_s_sGA$3a1&GA$3a1h�p�GA$3a1`st
GA$3p864 t�GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA$3h864(s(s
GA$3h864�%�%GA*�%(sGA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!
GA$3p864�e�GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protection
GA*FORTIFYGA+GLIBCXX_ASSERTIONSGA*GA!GA*GA!stack_realign
GA$3h864(s(s
GA$3h864�%�%GA$3a1e�e�GA$3a1e�e�GA$3a1&+GA$3a1p�u�pcretest-8.02-9.1.el8.x86_64.debug-��T�7zXZ�ִF!t/���2]?�E�h=��ڊ�2N���`�x<�\���+���5�b�3�IK�U�5~2N��Yg��z�G�-O+��և���eN?p�c��P`��)��EY����������s��dž���S�G|�!J�E�|^e�9�Ri�O�t�k���:�Syo%`�W}vw�Z؂X��@��X�ƹ��k��8;�6�m�a|p�&� U�z�Dž�uK�@!��=g�#�`��?V��z����drQ�$<���s�Z��XS?eg/.��A���ϳ	��8�}�dU��8a�̄ġM퇽/}�o����&,��Ʊ+Ph����<�s�o6�)��@�6�b�E��q�l�+��ܩ�619��,�C�!�WOL��!G��J�[t5�*t��3���>͹��}��h����~f��n<r4�_]�
�V���$%�&dDo����`?�C��v��9��x�5vz�4W���,��c�����ժ��}�9ԍ�]~�|�lN��'��9��:,C��~u�|��z�:��X�h(O>\�6�Hk�����$�ū�:�/��+��c+��h�4��f����!q+�����Ք{�N2�P��T����&�='�2��
�U�d�o��&�x�%f[-���3\�Qa�OT�����(�[BO��۳�~�鼥�~���.�s��a}r��2Bft�����ec>����T��!�c�?v!X�kTk"��C��������B�3����|$��JI���5����zv�a?�r����\�z������N�0�=��J�p�c��^�V(2���;GNa��_�N�J���K�V�|4�xɏ��K:T,�-��`��n�Ȗ�>.溧�#]��|����[��ս�@���W)k����^ј܍7��Ip�s��&:=8���H�`�D�q<�^x
�*v��K���.��R����X�׃�RX�F��b�	?��ixb��b�rGJW�=��G�I���MD�%�~���m!���zoJ0�����햃�T�-,��Ջ�=%e(��\������	�#�9�Â�c��#d����g�YZ.shstrtab.interp.note.gnu.property.note.ABI-tag.note.gnu.build-id.gnu.hash.dynsym.dynstr.gnu.version.gnu.version_r.rela.dyn.rela.plt.init.plt.sec.text.fini.rodata.eh_frame_hdr.eh_frame.init_array.fini_array.data.rel.ro.dynamic.got.data.bss.gnu.build.attributes.gnu_debuglink.gnu_debugdatapp�� &�� 4��$G���o��PQHHHY�	�	�a���o

�n���o�
�
`}H�BHH���00@�p"p"0��%�%�^�h�h�
������" �(�(�����0�0� 0��8� 8��@� @�� ��� �� �� ���� � � �� 
��`�X `�(/��t��>usr/bin/pcre-config000075500000003173150403561430010254 0ustar00#!/bin/sh

prefix=/opt/alt/pcre802/usr
exec_prefix=/opt/alt/pcre802/usr
exec_prefix_set=no

if test yes = yes ; then
  usage="Usage: pcre-config [--prefix] [--exec-prefix] [--version] [--libs] [--libs-posix] [--libs-cpp] [--cflags] [--cflags-posix]"
else
  usage="Usage: pcre-config [--prefix] [--exec-prefix] [--version] [--libs] [--libs-posix] [--cflags] [--cflags-posix]"
fi

if test $# -eq 0; then
      echo "${usage}" 1>&2
      exit 1
fi

libR=
case `uname -s` in
  *SunOS*)
  libR=" -R/opt/alt/pcre802/usr/lib64"
  ;;
  *BSD*)
  libR=" -Wl,-R/opt/alt/pcre802/usr/lib64"
  ;;
esac

while test $# -gt 0; do
  case "$1" in
  -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
  *) optarg= ;;
  esac

  case $1 in
    --prefix=*)
      prefix=$optarg
      if test $exec_prefix_set = no ; then
        exec_prefix=$optarg
      fi
      ;;
    --prefix)
      echo $prefix
      ;;
    --exec-prefix=*)
      exec_prefix=$optarg
      exec_prefix_set=yes
      ;;
    --exec-prefix)
      echo $exec_prefix
      ;;
    --version)
      echo 8.02
      ;;
    --cflags | --cflags-posix)
      if test /opt/alt/pcre802/usr/include != /usr/include ; then
        includes=-I/opt/alt/pcre802/usr/include
      fi
      echo $includes 
      ;;
    --libs-posix)
      echo -L/opt/alt/pcre802/usr/lib64$libR -lpcreposix -lpcre
      ;;
    --libs)
      echo -L/opt/alt/pcre802/usr/lib64$libR -lpcre
      ;;
    --libs-cpp)
      if test yes = yes ; then
        echo -L/opt/alt/pcre802/usr/lib64$libR -lpcrecpp -lpcre
      else
        echo "${usage}" 1>&2
      fi
      ;;
    *)
      echo "${usage}" 1>&2
      exit 1
      ;;
  esac
  shift
done
usr/lib64/libpcreposix.so.0.0.0000075500000027170150403561430012010 0ustar00ELF>@x'@8@�� �� � `x PP P $$���  P�td���<<Q�tdR�td�� � `pGNU��]��p�]~|(���V��I_�@ !�BE���|�H>�G>�qX�e�oI>�e ��^�� �, �F"m�    ��
|����  U��{�__gmon_start___ITM_deregisterTMCloneTable_ITM_registerTMCloneTable__cxa_finalizeregerrorstrlenstrncpy__sprintf_chkregfreepcre_freeregcomppcre_compile2pcre_info__stack_chk_failregexecpcre_execmalloclibpcre.so.0libc.so.6_edata__bss_start_endlibpcreposix.so.0GLIBC_2.3.4GLIBC_2.4GLIBC_2.2.5/opt/alt/pcre802/usr/lib64�ti	ii
$ui	.� �� �� � � ^� h� w� �� �� �� �� � � � � �  ( 0 /8 D@ WH d� � 	� � � 
x � � � � � � � 
� 
� ��H��H�! H��t��H����5� �%� ��h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h	��Q�������%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� D���%� DH�=� H�� H9�tH�� H��t	�����H�=� H�5� H)�H��H��H��?H�H�tH�m H��t��fD�����=u u+UH�=R H��tH�=� �I����d����M ]������w������AVI��AUATI��UH��S��~Hc�H�� L�,�L�����H�XM��tGI�T$���t=H��H��tH�XH9�v[H�U�L��L���L���A�D.�H��[]A\A]A^�f.�H��u�H��[]A\A]A^����L�-��DH��M��L���RL�
�H�����1�H�
��T���XH��Z[]A\A]A^�fD��H� H�?H���f.���SH��H���փ�H�� dH�%(H�D$1���H�L$L�D$����E����� E�����@E�����H�T$E�E1��Y���HcT$H�H�SH��t01�1�H���,���H�H�C1�H�L$dH3%(u'H�� [�fDHcT$���Aw�H�"�����������AWAVI�ֺ�AUATUH��SD��H��L�dH�%(H��$�1�E�bH�G����A������Eډڀ�A��Eډڀ�A��E�H�����u	M����1�E1�E1�E1�A����H���T$L�T$H�t$�O����T$L�T$H�t$��RA��H��E1�AW1�L���I���ZY����E��uaM���qL��1�@A�߉T�A�T��T�H��H9�r�E���-I9�v'H�D�J�T�fD�@����H���@�����H9�u�1��f�HcE�MH�)��`����I��
��I�����
��K�<vD�D$H��H�t$L�T$���I��H����C�vA�L�T$H�t$D�D$������E��u]�P���
w��H����H��$�dH3%(uhH�Ĩ[]A\A]A^A_��C�vE1�L�|$ �e����L���D$����D$�fDL���p��������덐1�������E������H��v�����H��H���unknown error code at offset %s%s%-6dinternal errorinvalid repeat counts in {}pattern error? * + invalidunbalanced {}unbalanced []bad classbad escape sequenceempty expressionunbalanced ()bad range inside []expression too bigfailed to get memorybad back referencebad argumentmatch failed			

	collation error - not relevant;<���X������������������<zRx�$x����FJw�?:*3$"D����h\H����F�E�B �D(�D0�Y
(A BBBKI
(A BBBH\8L@]8D0A(A BBB���� �����E�O0�
AG\����|F�B�J �B(�A0�D8�J���K�K�A�-
8A0A(B BBBHGNU���� ^hw���������/DWd��:�	
,� � ���o(xh
U` ��@p	���o���o���o�o����oP �	�	�	

 
0
@
P
`
GA$3a1GA$3a1�	�	GA$3a1,4GA$3a1�
GA$3p864�,GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GA$3a1,,GA$3a1,,GA$3a1�	�	GA$3a149libpcreposix.so.0.0.0-8.02-9.1.el8.x86_64.debug��K�7zXZ�ִF!t/��
�%]?�E�h=��ڊ�2N��
�)1�v�����J@�b�謖�9¯~��&z���D��2��'}�]��>D����H�01�u¡wx�ekM��@��K�h����*=�Ցߕw3�l�<�RK^�"�&�^�ʀ����٢���P���	�x��S)�OL��:'$^�A�T�6���9'm�c�J���q�9�$����:��z��Xbe���O����g=�H��!M"6��[Z2��`������}�J��DJ�E��-/g�#��PfS����{R��u����G~Y�>��Wdž�4��*����2���I^��Bg��%n�J{BN����}���V�u�=�!1y5Q�"�PӨ�ё�����އ#�M�������4�[hXz�`�-��K��9�p�>��㽵�Jפ��q�`ž _g�=�΅�Y��4g72�ϲ��q2�0���^���!e��Xԛ�;iX�8�K.�g^�Ƹh"e�~�t��؞o"�ݚ���_n��45�z)�~�,o/oc�a��s}��G(w]�1.n���M����"���p0�?�"�Ԙ�ne�j��[U4+k^�AL>"���Id1�)@1J�B�V�������4[i;�u���X	�<q^>ץ���G���x�#6?��ƟKݯ�+qn�exڨG]�;�*�d��<�j���8'�[�sƀ$��tAt-�4Us��VY٪FR��O��D�Q�vG�m��ӯ�Z�?�]L�@���q����b�t*Q�r��K���@��AP{���u�>�ɦ��H���l������P��g�YZ.shstrtab.note.gnu.build-id.gnu.hash.dynsym.dynstr.gnu.version.gnu.version_r.rela.dyn.rela.plt.init.plt.sec.text.fini.rodata.eh_frame_hdr.eh_frame.note.gnu.property.init_array.fini_array.data.rel.ro.dynamic.got.bss.gnu.build.attributes.gnu_debuglink.gnu_debugdata$���o((@(hh0xxU8���o��,E���o@T@@p^B���h�	�	c�	�	�np
p
�w},,
�@@� ���<�((`��� �� ��� ��� �� �P P�` `��  �� `���"4�"hP&"usr/lib64/libpcrecpp.so.0.0.0000075500000136420150403561430011427 0ustar00ELF>�7@��@8@�� h�h� h� �  ���� �� @@$$���  P�td(((44Q�tdR�tdh�h� h� ��GNUOU�*%���b�o3��tR�dC'
� %@0@�� h�BB�HQ%�\! " Ucd# �� �B��@�� 0� P|�C@H'(*+-./12346:<=>?BCGHIJNOPQSUVWXYZ^acdfhjlnprtvwxyz{���mG��d��i��i��Ў��FM�)��4K��;pn����<��Q��@���,S�4�A��r���(J���V����~i<.��O������)ŗ�ξ��D��Kt�aU�ʔ4�=�
r9K���i��݈c3�HZ�rD��.w(�x��@RFqLI�P�L��s���##�����j���)ʹC�k�]�Mre���4��e�C�����8�{�)�*Sˢ��ƹ'_U��ge���P�BE�쫋y��v��aY����-�BP���ɞj"�})HV?������|2���!�Gp>.����F��_��}�n��W���s�{*_��A�9��T	��l�H�1Y6��p�V��qX�H�	*���K��(�*�kF"�3�n�� ��-Kt,j�9��
z���� �
 , �t�TUP:
S�	0TspNi@ n��"�e!�T��S^pm1f
 W���Oa�	 T�PSApkD-@� ��s4��Q��S�
�Tk`p�a	T��L&#�lw��L&pw�@U��
�X�"�j���Sx"�e!D"0g���H��T�"�e��T�"�g���n���RU�
�T��Na��<"s�SpS�
�T�lw\0S��mQ�POi�"�uqM�SQ@<b��k���L�	�S�`S��S�`b��	T"
PT�@� ]�Y�|�<"�� q�k�R�P���m2 S�"ptq�	@T��?��U6J
`TH`o�0P���� �`L�r�m�E��=z�tg8p>"YpkD@�M��B�L�T�d|t
pT�P`,�S;@m0�@S�� 9�K���<7	�S__gmon_start___ITM_deregisterTMCloneTable_ITM_registerTMCloneTable__cxa_finalize_ZN7pcrecpp3Arg10parse_nullEPKciPv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev_ZdlPv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Evisspacememcpypcre_config__stack_chk_fail__assert_fail_Znwm_ZSt20__throw_length_errorPKc_ZN7pcrecpp2RE7CleanupEvpcre_free_ZdlPvm_ZN7pcrecpp2RED2Ev__gxx_personality_v0_ZN7pcrecpp2RED1Ev_ZNK7pcrecpp2RE8TryMatchERKNS_11StringPieceEiNS0_6AnchorEbPiipcre_exec_ZNK7pcrecpp2RE23NumberOfCapturingGroupsEvpcre_fullinfo_ZNK7pcrecpp2RE11DoMatchImplERKNS_11StringPieceENS0_6AnchorEPiPKPKNS_3ArgEiS5_i_ZNK7pcrecpp2RE9FullMatchERKNS_11StringPieceERKNS_3ArgES6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6__ZN7pcrecpp2RE6no_argE_ZNK7pcrecpp2RE12PartialMatchERKNS_11StringPieceERKNS_3ArgES6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6__ZNK7pcrecpp2RE7ConsumeEPNS_11StringPieceERKNS_3ArgES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5__ZNK7pcrecpp2RE14FindAndConsumeEPNS_11StringPieceERKNS_3ArgES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5__ZNK7pcrecpp2RE7DoMatchERKNS_11StringPieceENS0_6AnchorEPiPKPKNS_3ArgEi_Znam_ZdaPv_ZN7pcrecpp3Arg17parse_stringpieceEPKciPv_ZN7pcrecpp3Arg10parse_charEPKciPv_ZN7pcrecpp3Arg11parse_ucharEPKciPv_ZN7pcrecpp3Arg16parse_long_radixEPKciPvi__errno_locationstrtol_ZN7pcrecpp3Arg17parse_ulong_radixEPKciPvistrtoul_ZN7pcrecpp3Arg17parse_short_radixEPKciPvi_ZN7pcrecpp3Arg18parse_ushort_radixEPKciPvi_ZN7pcrecpp3Arg15parse_int_radixEPKciPvi_ZN7pcrecpp3Arg16parse_uint_radixEPKciPvi_ZN7pcrecpp3Arg20parse_longlong_radixEPKciPvistrtoq_ZN7pcrecpp3Arg21parse_ulonglong_radixEPKciPvistrtouq_ZN7pcrecpp3Arg12parse_doubleEPKciPv__memcpy_chkstrtod_ZN7pcrecpp3Arg11parse_floatEPKciPv_ZN7pcrecpp3Arg11parse_shortEPKciPv_ZN7pcrecpp3Arg15parse_short_hexEPKciPv_ZN7pcrecpp3Arg17parse_short_octalEPKciPv_ZN7pcrecpp3Arg18parse_short_cradixEPKciPv_ZN7pcrecpp3Arg12parse_ushortEPKciPv_ZN7pcrecpp3Arg16parse_ushort_hexEPKciPv_ZN7pcrecpp3Arg18parse_ushort_octalEPKciPv_ZN7pcrecpp3Arg19parse_ushort_cradixEPKciPv_ZN7pcrecpp3Arg9parse_intEPKciPv_ZN7pcrecpp3Arg13parse_int_hexEPKciPv_ZN7pcrecpp3Arg15parse_int_octalEPKciPv_ZN7pcrecpp3Arg16parse_int_cradixEPKciPv_ZN7pcrecpp3Arg10parse_uintEPKciPv_ZN7pcrecpp3Arg14parse_uint_hexEPKciPv_ZN7pcrecpp3Arg16parse_uint_octalEPKciPv_ZN7pcrecpp3Arg17parse_uint_cradixEPKciPv_ZN7pcrecpp3Arg10parse_longEPKciPv_ZN7pcrecpp3Arg14parse_long_hexEPKciPv_ZN7pcrecpp3Arg16parse_long_octalEPKciPv_ZN7pcrecpp3Arg17parse_long_cradixEPKciPv_ZN7pcrecpp3Arg11parse_ulongEPKciPv_ZN7pcrecpp3Arg15parse_ulong_hexEPKciPv_ZN7pcrecpp3Arg17parse_ulong_octalEPKciPv_ZN7pcrecpp3Arg18parse_ulong_cradixEPKciPv_ZN7pcrecpp3Arg14parse_longlongEPKciPv_ZN7pcrecpp3Arg18parse_longlong_hexEPKciPv_ZN7pcrecpp3Arg20parse_longlong_octalEPKciPv_ZN7pcrecpp3Arg21parse_longlong_cradixEPKciPv_ZN7pcrecpp3Arg15parse_ulonglongEPKciPv_ZN7pcrecpp3Arg19parse_ulonglong_hexEPKciPv_ZN7pcrecpp3Arg21parse_ulonglong_octalEPKciPv_ZN7pcrecpp3Arg22parse_ulonglong_cradixEPKciPv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcmmemmove_ZN7pcrecpp3Arg12parse_stringEPKciPv_ZNK7pcrecpp2RE7RewriteEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKNS_11StringPieceESA_Pii_ZNK7pcrecpp2RE7ReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZSt24__throw_out_of_range_fmtPKcz_Unwind_Resume_ZNK7pcrecpp2RE7ExtractERKNS_11StringPieceES3_PNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZNK7pcrecpp2RE13GlobalReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEprintfabort_ZN7pcrecpp2RE9QuoteMetaB5cxx11ERKNS_11StringPieceE_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag_ZSt19__throw_logic_errorPKc_ZN7pcrecpp2RE7CompileENS0_6AnchorEpcre_compilestrlen_ZN7pcrecpp2RE4InitERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPKNS_10RE_OptionsE__cxa_atexit_ZN7pcrecpp6no_argE_ZN7pcrecpp7ScannerC2Ev_ZN7pcrecpp7ScannerC1Ev_ZN7pcrecpp7ScannerC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZN7pcrecpp7ScannerC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZN7pcrecpp7ScannerD2Ev_ZN7pcrecpp7ScannerD1Ev_ZN7pcrecpp7Scanner11DisableSkipEv_ZNK7pcrecpp7Scanner10LineNumberEv_ZNK7pcrecpp7Scanner6OffsetEv_ZNK7pcrecpp7Scanner9LookingAtERKNS_2REE_ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT__ZN7pcrecpp7Scanner11GetCommentsEiiPSt6vectorINS_11StringPieceESaIS2_EE_ZN7pcrecpp7Scanner15GetNextCommentsEPSt6vectorINS_11StringPieceESaIS2_EE_ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT__ZN7pcrecpp7Scanner11ConsumeSkipEv_ZN7pcrecpp7Scanner17SetSkipExpressionEPKc_ZN7pcrecpp7Scanner4SkipEPKc_ZN7pcrecpp7Scanner10EnableSkipEv_ZN7pcrecpp7Scanner7ConsumeERKNS_2REERKNS_3ArgES6_S6__ZlsRSoRKN7pcrecpp11StringPieceE_ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l_ZNSt8ios_base4InitC1Ev_ZNSt8ios_base4InitD1Evlibpcre.so.0libstdc++.so.6libm.so.6libc.so.6libgcc_s.so.1_edata__bss_start_endlibpcrecpp.so.0GCC_3.0GLIBC_2.3.4GLIBC_2.4GLIBC_2.14GLIBC_2.2.5CXXABI_1.3GLIBCXX_3.4.20GLIBCXX_3.4.9CXXABI_1.3.9GLIBCXX_3.4/opt/alt/pcre802/usr/lib64	
� P&y�Pti	
ii
$���.ui	9�ӯk	Ep��P�)_yѯmt)�zh� @:p� �8x� P9�� :�� �� �� �� (ȟ cП ؟ -� "� $� %�� &� � � �� � � � P� + � (� h0� C8� @� H� ZP� 	X� 
`� `h� p� \x� �� O�� 
�� K�� �� k�� �� �� �� Ȟ О ؞ � � � ��� � I� � u�  � W(� �0� 8� @� }H� 8P� X� p`�  h� qp� !x� #�� A�� 2�� z�� x�� F�� X�� v��H��H��o H��t��H����5bm �%cm ��h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h	��Q������h
��A������h��1������h��!������h
��������h��������h������h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h��Q������h��A������h��1������h��!������h��������h��������h������h �������h!��������h"�������h#�������h$�������h%�������h&�������h'��q������h(��a������h)��Q������h*��A������h+��1������h,��!������h-��������h.��������h/������h0�������h1��������h2�������h3�������h4�������h5�������h6�������h7��q������h8��a������h9��Q�������%�i D���%�i D���%�i D���%�i D���%�i D���%�i D���%�i D���%�i D���%}i D���%ui D���%mi D���%ei D���%]i D���%Ui D���%Mi D���%Ei D���%=i D���%5i D���%-i D���%%i D���%i D���%i D���%
i D���%i D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%�h D���%}h D���%uh D���%mh D���%eh D���%]h D���%Uh D���%Mh D���%Eh D���%=h D���%5h D���%-h D���%%h D���%h D���%h D���%
h D���%h D���%�g D���%�g DH�|$ I��L9�t���H���e����H������H�����H��H9�t����H���@���H�;H9|$t���H���)����L�� ����H������H�|$I��L9�t���H�����H�|$ I��L9�t�e���H�;I9�t�X���H�߾H�{���H�������H�|$ I��L9�t�/���H�;I9�t�"���H�߾H�E���H�����H�<$H��H9�t���H���s�����H��H��f H��f H�wg H�=�f �qg H�PH�~b H�H�Xg H�p�H�=g �x���H�g �g H���f.�@��H��H�=!g �\���H�=�f H��H�b H�5g ����f.�@H�=qf H�jf H9�tH�6f H��t	�����H�=Af H�5:f H)�H��H��H��?H�H�tH�f H��t��fD�����=f u+UH�=�e H��tH�=fa ����d�����e ]������w������H�����DAUATI��UH��S��H����~�>�����H��>��uLc�C�,�Ѓ�0��	wH��>�� ~)H��[]A\A]�@��ߍH�L��w�H��>�� �L��L��H�����B�D-H��[]A\A]�Df.�H��dH�%(H�D$1���%ptH�T$dH3%(u}H���fDH�t$�����D$��
t0��
t=

t4���t?���uI�P���f�� �f��0�f��@�s����a���H�
B�wH�5�<H�=�=�R���f�H�H��xTH9�v#H�H9�sH��x&H�7H�~����f.�H�x�����H��������H�H�����PH�=w<����fD��SH��H�0H��t	H�xc �H�{8H��t	H�fc �H�[@H��c H9�t$H��tH�;H�CH9�t�.���H�߾ [�P���[�@f.���SH��H������H�{�H9�t[��[�@f.���SH��@L�W8dH�%(H�D$81���LDW0�\$PM����HcG A��H�$H�D$H�D$H�D$H�D$ H�D$(��~
H�$H�D$HcG$��~
H�$H�D$(1��ɋNL�������€�E��E��D�H�H�5�;H��HD�H��SAQA������ZY��xu������	�1�H�\$8dH3%(uH��@[��+����f.���H��H�8dH�%(H�D$1�H��t.1�H�L$��.�����u%�D$H�T$dH3%(uH��ø����������H�
N<��H�5B:H�=^:���fD��AWAVI��C�LIAUATUSH���D$HL�d$@9���H����M��1�PD��A�M��I��H���r���ZY����t_A�D$A���tjM��teL������9�B��~U�E�I��I�l���I��I��L9�t3Ic|$�I�EA�4$H�)�H;�P��u�H��1�[]A\A]A^A_�fDH���[]A\A]A^A_�H�
<�7H�599H�=}9���H�
�;�5H�59H�=G9���@f.���AWAVAUATUSH��H��$L��$�L��$�H��$�H�$H��$H��$�L��$�H�D$H��$ L��$�L��$H�D$H��$(L��$H�D$dH�%(H��$�1�H��_ H9��eH�T$0H9��_H�L$8I9��YL�D$@I9���L�L$HI9��ML�T$PI9��OL�\$XH9��QH�\$`H9��sH�l$hI9��EL�d$pI9��gL�l$xI9��iL��$�I9��hH�$L��$�H9��cH��$�H�\$H9��]H��$�H�\$H9��WH��$�H�\$H9���H��$�A���A�f.�H�L$,j3�H��$�PL�D$@����ZYH��$�dH3%(��H�Ę[]A\A]A^A_�fDE1��A��A���A���A��u���DA��e���DA��U���A��J���f.�A��5���DA�	�%���DA�
����DA�����DA����DA�
���DA���������AWAVAUATUSH��H��$L��$�L��$�H��$�H�$H��$H��$�L��$�H�D$H��$ L��$�L��$H�D$H��$(L��$H�D$dH�%(H��$�1�H��\ H9��eH�T$0H9��_H�L$8I9��YL�D$@I9���L�L$HI9��ML�T$PI9��OL�\$XH9��QH�\$`H9��sH�l$hI9��EL�d$pI9��gL�l$xI9��iL��$�I9��hH�$L��$�H9��cH��$�H�\$H9��]H��$�H�\$H9��WH��$�H�\$H9���H��$�A���A�f.�H�L$,j31�H��$�PL�D$@��ZYH��$�dH3%(��H�Ę[]A\A]A^A_�f�E1��A��A���A���A��u���DA��e���DA��U���A��J���f.�A��5���DA�	�%���DA�
����DA�����DA����DA�
���DA���������AWAVAUATUSH��H��H��$H��$�L��$�L��$�H�$H��$H��$�L��$�H�D$H��$ L��$�L��$H�D$H��$(L��$H�D$dH�%(H��$�1�H��Y H9��rH�T$0H9��lH�L$8I9��fL�D$@I9���L�L$HH9��ZH�t$PI9��\L�T$XI9��^L�\$`H9���H�l$hI9��RL�d$pI9��tL�l$xI9��vL��$�I9��uH�4$L��$�H9��pH��$�H�t$H9��jH��$�H�t$H9��dH��$�H�t$H9���H��$�A��fDA�f�H�L$,j3�H��H��$�PL�D$@��ZY��tHcL$,H)KH��$�dH3%(��H�Ę[]A\A]A^A_�@E1��A��A���A��u���DA��e���DA��U���DA��E���A��:���f.�A��%���DA�	����DA�
����DA����DA����DA�
����DA���������AWAVAUATUSH��H��H��$H��$�L��$�L��$�H�$H��$H��$�L��$�H�D$H��$ L��$�L��$H�D$H��$(L��$H�D$dH�%(H��$�1�H��V H9��rH�T$0H9��lH�L$8I9��fL�D$@I9���L�L$HH9��ZH�t$PI9��\L�T$XI9��^L�\$`H9���H�l$hI9��RL�d$pI9��tL�l$xI9��vL��$�I9��uH�4$L��$�H9��pH��$�H�t$H9��jH��$�H�t$H9��dH��$�H�t$H9���H��$�A��fDA�f�H�L$,j31�H��H��$�PL�D$@�!�ZY��tHcL$,H)KH��$�dH3%(��H�Ę[]A\A]A^A_��E1��A��A���A��u���DA��e���DA��U���DA��E���A��:���f.�A��%���DA�	����DA�
����DA����DA����DA�
����DA���������AWAVAUATUSH��xdH�%(H�D$h1�E����C�\IH��I��A��I�΃�7SH�D$P����XZH�T$hdH3%(��uwH��x[]A\A]A^A_�fDHc�D�L$H��L�$��SL��L��PD�L$I��D��L�D$H���S�L�����	�Y^�H�
�.�[H�5R,H�=�,��������H��tH�:�r�Ðf.���1���uH��t����D��f.���1���uH��t����D��f.���AWAVAUATE1�USHc�H��HdH�%(H�D$81���u$H�L$8dH3%(D��u|H��H[]A\A]A^A_�f�H�D$H��I�։�H��A����H�����H�t$D��H���I��H����H9\$u�A���u�M��tI�A��fDA��q����p���AVAUATUSHc�H��@dH�%(H�D$81���u)H�L$8dH3%(��H��@[]A\A]A^��H�D$H��I�ԉ�H��A���X��8-H��t(��H�t$D��H���I��H��_�H9\$t1��@A���u�M��tI�$��m���fD��]��������SH��H��dH�%(H�D$1�H������tH�$H���H����w$H��tf�H�\$dH3%(uH��[�fD1����7��fD��SH��H��dH�%(H�D$1�H���l���tH�$H����w#H��tf�H�\$dH3%(uH��[�D1�������@f.���SH��H��dH�%(H�D$1�H������tH�$�������H�H9�w"H��t�H�\$dH3%(uH��[�D1����W��fD��SH��H��dH�%(H�D$1�H������tH�$�����H9�w"H��t�H�\$dH3%(uH��[�D1�������@f.���AWAVAUATE1�USHc�H��HdH�%(H�D$81���u$H�L$8dH3%(D��u|H��H[]A\A]A^A_�f�H�D$H��I�։�H��A�����H����H�t$D��H���I��H��$�H9\$u�A���u�M��tI�A��fDA��q���� ���AVAUATUSHc�H��@dH�%(H�D$81���u)H�L$8dH3%(��H��@[]A\A]A^��H�D$H��I�ԉ�H��A�����8-H��t(���H�t$D��H���I��H��/�H9\$t1��@A���u�M��tI�$��m���fD��]����Q����AVAUATUSH���dH�%(H��$�1�����������t01�H��$�dH3%(����H���[]A\A]A^�DHc�L�d$H����L��I��H������D,���H�t$L���I����I�4,H9t$u�A���u�M��t�AE��m�����`����q����SH��H��dH�%(H�D$1�H������tH��t
f��Z$�H�L$dH3%(uH��[����f.����
�r�f�����b�f�����R�f���1��E��@���
��f�������f�������f���1�����@���
��f�������f�������f���1�����@���
��f������f�����r�f���1��e��@���
�"�f������f������f���1����@���
�"�f������f������f���1����@���
��f������f�����r�f���1��e��@���
����f��������f�������f���1�����@��H��t'H��H��H�RH��Lc�H��1��L��H���f���f.���AWAVAUATM��UH��SH��H�D�L$LcJH�L$N�4I9�w5�x@�S�BЃ�	w|;D$�r�H�Ic4�����H��I9�vE�L�{<\t�H�]H�UH�ML�kH9���H�MI9�w|�H�EL�m�DL��I9�w�H���[]A\A]A^A_Ã�\��L�mH�UH�MM�}H9���H�MI9���B�*\H�UH��L�}B�D*�W���D1�A�1�H��H��D$����H�U�D$�]���f�A�T�H�D$)�H0H��������H+EHc�H9�wlH��������fD��
���fD1�A�1�L��H���s��H�U�I���f.���,���fDH��1�[]A\A]A^A_�H�=�!�����f���AW1�A�AVAUI��ATU1�SH��H��(H�t$L�|$@L�t$H��M��L��dH�%(H��$ 1�H�H�D$H�B1҉D$ j3���ZY��u0H��$dH3%(����H��([]A\A]A^A_�DL�d$ A��M��L��I�T$L��L���D$0H�T$ H�H�D$(H�T$H�S�T$H�T$�J���ń�t@�D$@��xY�T$D��xpH�{)�Hc�L�D$(Hc�H�L$ H��H)�H9�HG�H9�wgH���$��H�|$ I��L9��2����]���(������H�
�%�VH�5  H�=� ���H�
�%�WH�5 H�=z �u��H��H��H�=!1�H�5k ������H������@f.���AVA�I��1�AUI��ATUH��1�SH��H��H���dH�%(H��$�1�I��H��j3M�����A��X1�ZE��t"I�M��H��L��I�FL��H�������H��$�dH3%(uH���[]A\A]A^����f.���UH��AWAVAUATSH��XH������H������dH�%(H�E�1�H�����ƅ��H������H��H�����H�BHDž������gH�����I��1�H������E1�H������Dž����H��������H�������L��L������j3H�������D��L������E1�����^A��_D���dD���D���A9��E9��TH������D��Hc�)�H�OHc�H��H)�H9�HG�H�H9���H������H����H������L������E��L��H������H������H�H������H������H�C������4��H�K������E9�����D9��hL������M�E�����D��H��L������1�L��j3H������L������A�����������ZA��Y���������������H�����H������H��H9�t���H�M�dH3%(�������nH�e�[A\A]A^A_]�I�MA�\$��9���Mc�I9���H������H�0B�<.
�<A�G)uCD9���I���"�������A�(=0�0H������H�H��A�\$��tx9�}tHc�H9��SH������H�6�I��%�=���H��������Hc�I���&H9���<H�����������9��L��������D9�9������H������H�0�6���f�H������Mc�H�0��H��D)�L)�Hc�H9�HG�I9��H������L��f��H������H�@��9��O���H������L�����f�Hc�H9��a�<
A�(��������������DL��������A9�������f���f�H������H�8H������H������H�����L�����H��H��H9���L����H9���H������H�����H�H�FH����H������L�HH������H�����L�@�~���H������H�HH�89��o���Hc�H��H)�H9���H�4H�������3��H������H�8H�H�:������A�(=@������w��H������A�(H�I��=P��������fDH9�tYH������fo���H�����H�BH�2JH�����,���H������H�����H���oVH�)�����)������M��t,fo���H��tRH������H���ofF)�������H��H������H������H�@�@�U���Dž�����F���H������@L�@�2���H��H�=81�H�5f���H�
;��H�5�H�=��c��L�~H�
51��+H�5'H�=H�[�����fDH�
���H�5�H�=\���H��H�5�H�=�1����L��H�5�H�=�1��q��������H�������AWH�GAVAUATUSH��H��H�H�D$�G�FH�G����I��1�E1��dfDD��H�M�u��߃�A<vA�D$�<	vE��x
A��_��H9T$��H�CI9�wxF�$*H�L�sB�0H��A9o~FL�kI�D�$(E��u�H��������L)�H��� �H�5WH���b��H��A9o�H��H��[]A\A]A^A_�fDA�1�1�L��H������H��k�����Y���fDH9T$��H�CI9�wlB�*\H�L�sB�0I�L�cD�4(H�M�l$H9D$t`H�SI9�wF�4 H�L�kB�D �	����A�1�1�L��H���[��H���fDA�1�1�L��H���;��H��w�����f���U���H�=������H�����@f.���AUATUSH��H��H�o(dH�%(H�D$81���tGH�?��H�L$H�T$E1��p��H��H����H�L$8dH3%(H���H��H[]A\A]�f�L�d$H��H�r�I�D$L��H�D$���H�SH�3L�����H��������H+D$H�����H�5^L���b��H�|$H�L$E1���H�T$����H�|$I��H��L9��G����_��H���B���fDH��< H9C@�+���� �E��L�l$H�����I��H�@I�$M��t
L���!��I�TL��L������L�c@����C��H�=+�����H���3����H���<���@f.���AWAVAUATI��USH��H��dH�%(H�D$1�H9�t7L�/L�I��H�nM9��H�wH9���H����H�kA�D-M����I�$H�C A�D$�C(H��; H�C01�H��H�C@H�C8�n��H�C8H��t�H���X��H�C0H�D$dH3%(��H��[]A\A]A^A_ÐI�6H��tL��H�����L�+�^���@H��H�,$����H�;I��I9�t���H�$L�+H�CH���*����fDH�; H�C �; �C(�,��������fD�A�EL�+������@��H�H�GH9�tH���(����f.�D��AWAVM��AUATL�gUH��SH��H��8H�wdH�%(H�D$(1�H�DI��H�D$I)�L��H)�H�H�D$ L;'�H�wH�|$ H�L$���H��L�H�L$I��t,H����L��H��H��H�L$L�D$�1��H�L$L�D$H��t(M��t#I�</I����L��H��L�D$���L�D$M��t(H�t$J�|5L�L�I��t_L��L�D$����L�D$M9�tL�����H�D$ L�;H�CH�D$(dH3%(uNH��8[]A\A]A^A_�DA�A��]���@���f����fD���\����������USH��L�CH��H�GH�?H�,L9�tOL�CL9�w.H��tH�H��tH���H�;H�kH���/H��[]�fDI��H��H��1�H�����H�;��A�����H�;�fD��H��������AWAVAUATUH��SH�H��(H�WH)�I9���M��H�H��H�I)�N�$2H9���H�{L9��dL�<0I��H�I)�A��L9�@��A!�H9�vtE��t.I�4/K�<I���ML��L�D$H�L$���L�D$H�L$M��tI���L��H��L������H�L�cB� H��(H��[]A\A]A^A_�fDH�H9�w�M���L9���I����L��H��L��L�L$L�D$H�L$���E��H�L$L�D$L�L$t�I�4/K�<I����L��L�D$H�L$����L�D$H�L$L9��M���L�J�H9���H9���J�41I���L��L���������f.�H��H����������A���D������fD��Y���fD�A�E���g����4���@I��t�L��H��L������������4���fDH)�H��tDH��tH��H��L��L�D$����L�D$L��K�4I�</H)�H��tH���A����0���7����A������%����A�����H�=��r��f���ATI��UH��SH��dH�%(H�D$1�H��t	H����L)�H��H�$H��wPH�EH��u6A�$�H�$H�EH�]�H�D$dH3%(uYH��[]A\�fDH��t��f�H��1����H�$H�EH�UH��L��H���L��H�$H�E�H�=#�������H�?H��x	H���o��PH�=B
���f���H�GH�GH�H�G 1��G�G(H�G0f�G8�G:H�G@�GH�f�f.���ATUSH��H��dH�%(H�D$1�H�GH�L�&H�nL��H�t	M����H�,$H��wxH��ubA�$�SH�k�(H�H�C0H�C H�C�C:�C(1�f�C8H�C@�CHH�D$dH3%(u\H��[]A\�f.�H��t��f�H������H�$H�H�SH��L��H������H�,$H��i���H�=�
����R��f���USH��H��H�o0H��tH������HH�����H�k@H��tH�}H��t����H������H�;H��H9�tH��[]���f.�H��[]���H�0t�G8�PH�
��fH�50H�=9�����H�H�w �H9�s1ɀ:
��H���H9�u��@Ð@f.���H�G H+�@��H��H��E1�E1�dH�%(H�T$1�H�w �H�L$H�����H�T$dH3%(uH�������@f.���L�G@M����AVAUATUSI�I;Xt\I��I��Hc�Lc��
H��I9XtDI�$H�H�4(H9�r�HcsL�H��H�H9�w�I�vI;vt*H�H��H���N�I�vI9Xu�[]A\A]A^�f�H��L������M�D$@�fDÐfD��H�G@H��t{ATUSHc_HH��HH;Xt]H��I���+fDH��SH��H��H�N�V�H�uA�D$HH9Xt*H�uH;uu�H��H��H���R��I�D$@A�D$HH9Xu�[]A\�@ÐfD��AUATL�g UH��SH��(L�o H�H0 dH�%(H�D$1��fD�}9t,SH�}0I��I��SH��H��L��SSSSSSSSSS���H��`��u΀}:tH�}@tOH�E L)�H�D$dH3%(ueH��([]A\A]�H�}@L�,$�D$H�wH;wt7L�.H���F�H�w뼿���H�H�@H�@H�E@�H���K������@��AWAVAUI��ATUH��SH��XH�_0dH�%(H�D$H1�H��tH�������HH���J��M�����HL�d$ ���I�T$L��L�x�@H��L�8H�@H�@ �@(H�T$ H�T$����H�T$H��I��H�D$��H����A�M�L$0H�D$(L��H���1����H�|$ I��L9�t�s���H�]0H��f�U8����@1�H�E0f�E8H�D$HdH3%(ukH��X[]A\A]A^A_�H�|$���H�L$H�D$ H�L$0L��L��H��H�L$���H�L$H�T$ H���N���f.�H���;���H��H��������H���Y����H���:��f���AWAVAUI��ATUH��SH��XH�_0dH�%(H�D$H1�H��tH���'���HH�����M�����HL�d$ �b��I�T$L��L�x�@H��L�8H�@H�@ �@(H�T$ H�T$�&��H�T$H��I��H�D$��H����A�M�L$0H�D$(L��H���1�����H�|$ I��L9�t�����H�]0H��f�U8�����@1�H�E0f�E8H�D$HdH3%(ukH��X[]A\A]A^A_�H�|$���H�L$H�D$ H�L$0L��L��H��H�L$���H�L$H�T$ H���N���f.�H���;���H��H����j����H��������H�����f���H�0t	�G8�,��PH�
�
�kH�5�
H�=�
�,���f�f.���SH��H��H�s H��L�
�+ AQAQAQAQAQAQAQAQAQAQAQAQ�9��H��`��t�{8uH��[�DH�߈D$����D$H��[�f���AWH��AVAUATI��USH��H��H�oL�/H��L)�L)�H���+H�4I���H9���L��H�T$H�$����H�$H�T$I��I�H�@�rH�L�H��qL9�t;L��L��f�L�
D�BH��H��L�I�D�A�H9�u�H�C�L)�H���I�D H9�t;H��H��f�D�BL�
H��H��L�I�D�A�H9�u�H)�H�U�H���H�DM��tL��H�$�C���H�$M�4$M�|$I�D$H��[]A\A]A^A_�f�H��������H9�����H��u�E1�E1�������H��I������f.�D��AWH��AVAUATI��USH��H��H�oL�/H��L)�L)�H���+H�4I���H9���L��H�T$H�$膿��H�$H�T$I��I�H�@�rH�L�H��qL9�t;L��L��f�L�
D�BH��H��L�I�D�A�H9�u�H�C�L)�H���I�D H9�t;H��H��f�D�BL�
H��H��L�I�D�A�H9�u�H)�H�U�H���H�DM��tL��H�$�þ��H�$M�4$M�|$I�D$H��[]A\A]A^A_�f�H��������H9�����H��u�E1�E1�������H��I������f.�D��AUI��ATUSH��8L�&Hc^dH�%(H�D$(1�H��L��H�UH��H�H�$t	M����H��wsH��u]A�$�D$H�EH�\$L���H�T$H�4$�6���H�<$H��H��H9�t���H�L$(dH3%(H��uWH��8[]A\A]�H�EH��t��D��x:H�{蓽��H�\$H�$H��L��H���,���H�$�s���H�=�׼��袽��H�=6�v�����H���-�����H��H���pcrecpp.ccbasic_string::_M_createpcre_retval == 0(1 + n) * 3 <= vecsizematches >= 0n >= 0basic_string::appendvec[0] >= 0vec[1] >= 0basic_string::replace__pos <= size()matchstart >= startmatchend >= matchstart\0(?:)\zNULL == "Unexpected return value from pcre_config(NEWLINE)"%s: __pos (which is %zu) > this->size() (which is %zu)/usr/include/c++/8/bits/basic_string.h%s:%d: %s: Assertion '%s' failed.
basic_string::_M_replacebasic_string::_M_construct null not validint pcrecpp::RE::NumberOfCapturingGroups() constbool pcrecpp::RE::DoMatch(const pcrecpp::StringPiece&, pcrecpp::RE::Anchor, int*, const pcrecpp::Arg* const*, int) constbool pcrecpp::RE::DoMatchImpl(const pcrecpp::StringPiece&, pcrecpp::RE::Anchor, int*, const pcrecpp::Arg* const*, int, int*, int) conststd::__cxx11::basic_string<_CharT, _Traits, _Alloc>::reference std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::operator[](std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::size_type) [with _CharT = char; _Traits = std::char_traits<char>; _Alloc = std::allocator<char>; std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::reference = char&; std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::size_type = long unsigned int]int pcrecpp::NewlineMode(int)int pcrecpp::RE::GlobalReplace(const pcrecpp::StringPiece&, std::__cxx11::string*) constbool pcrecpp::RE::Replace(const pcrecpp::StringPiece&, std::__cxx11::string*) constpcre_scanner.ccskip_ != NULLvoid pcrecpp::Scanner::EnableSkip()void pcrecpp::Scanner::DisableSkip();4eH���P����x�����
�����ظ��\� ���tV�������������t(����(����8����ػ������ ���8����x�����Ⱦ���H����x���Xh���X��X��xX���8��DX��X���l����x���H�����4(��X���|����������0	x��t	����	����	����	���	���	(���	8��
H��$
X��8
h��L
x��`
���t
����
����
����
����
����
����
�������((��<8��PH��dX��xh���x��������������������������������\
���h��X(��8����(X�����d���X�t(��H��������� H�4��H��\�����8�8��������H���x���<H���`zRx�$��FJw�?:*3$"Dx����\����p��!H�x����B�B�D �D(�F0{
(A ABBEr(A ABB�̷���D m
G�����i]ظ��bE�V
EAzPLRx�m � $���"sE�V
EA,h���E�DP�XB`IXAPn
AA��zH F
Al�X���"F�B�J �B(�A0�A8�D@UHHPXHA@m
8C0A(B BBBGD
8F0A(B BBBA\$����F�B�B �B(�A0�A8�G���N�K�A�^
8A0A(B BBBG\������F�B�B �B(�A0�A8�G���K�K�A�^
8A0A(B BBBJ\�8���F�B�B �B(�A0�A8�J���Q�K�A�m
8A0A(B BBBE\D���F�B�B �B(�A0�A8�J���N�K�A�m
8A0A(B BBBHh�x���F�B�B �B(�A0�A8�D�p�F�H�A�V
8A0A(B BBBGV�G�c�A����$���&8��&HL0���F�B�B �B(�D0�A8�G�k
8A0A(B BBBC@�����F�B�B �A(�A0�Gpl
0A(A BBBI �0��iE�G L
AG |��aE�G E
AF $���iE�G M
AF H��aE�G E
AFHl`���F�B�B �B(�D0�A8�G�k
8A0A(B BBBC@�����F�B�B �A(�A0�Gpl
0A(A BBBI@�`���F�B�B �A(�A0�G�G
0A(A BBBF @���UE�G B
AAd8��x4���0���,���(���$��� ���������,��@��T��h��|��������������������������������0���D���X���l�����������������������H�h��F�B�E �B(�E0�D8�Gp
8A0A(B BBBF(0	����E�A�K ~
AAGH\	���P�B�B �B(�A0�D8�G`�
8D0A(B BBBG�	���6M`d�	���F�B�B �B(�D0�D8�DP�
8F0A(B BBBA
8C0A(B BBBA\	�����F�J�B �E(�A0�C8�J�S�m�F�A�d
8A0A(B BBBF,zPLRx�	 ��������4����P�
����F�M�E �A(�F0�M�Z�B�L�C�A
0A(A BBBA<
����E�C
P������.n.�.c
AL.,zPLRx�- �������4��$IL�
@��9F�F�B �B(�A0�A8�GP�
8D0A(B BBBG(zPLRx�� �P������0t����0@����F�D�D �D0c
 AABG<T�����F�B�A �A(�GpV
(A ABBC$zPLRx�� �p����,ب��0;H����|F�B�B �B(�D0�A8�GP�
8A0A(B BBBB@
,���rHi\
���Rp
���D0�
����F�A�A �G0�
 AABK0�
���wE�A�G R
AAODAA�
���0Q��18��(4��QH C
AHD��qF�E�B �B(�D0�A8�GP
8A0A(B BBBCD�,���S�B�B �A(�A0�f
(A BBBJX�����,�����O�A�A �oABE���H��qF�E�B �B(�D0�A8�GP
8A0A(B BBBCdT����F�B�E �D(�DPjXK`JhApAxA�A�A�A�A�A�A�IPp
(A ABBDP�@���cF�B�B �E(�A0�D8�D�
8A0A(B BBBD,zPLRx��
 ��������4����5�P8T����F�B�B �E(�A0�D8�D�
8A0A(B BBBD�n���5����4UP���gE�N I(B0B8B@BHBPBXB`BhBpBxB�I N
AFUA<��5F�E�A �A(�D`�
(A ABBD$zPLRx�% �`����,������@���2HW����e����������
�����������;W�K�������� ��Y�������0��Y�������0��
u��B��GNU�@:�8P9:�� ������H0
�xh� �� ���o(�H
�Н p�*p)h	���o���o�(���o�o�'���o�� �0�0�0�0�0�0�0�011 101@1P1`1p1�1�1�1�1�1�1�1�122 202@2P2`2p2�2�2�2�2�2�2�2�233 303@3P3`3p3�3�3�3�3�3�3�3�344GA$3a1�9�9GA$3a1H0^0GA$3a1�x�xGA$3a1�9I:
GA$3p864P:|eGA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA$3h864�9�9
GA$3h864�7�7GA*�e�eGA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!GA*�e/gGA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!GA*0g�gGA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!GA*�g~jGA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!GA*�jOkGA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!GA*�8B9GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!
GA$3p864PkgtGA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA$3h864�9�9
GA$3h864H8H8GA*pt�uGA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!GA*�uawGA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!
GA$3p864pw�xGA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA$3h864�9�9
GA$3h864�8�8GA*P9�9GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!GA$3a1�x�xGA$3a1�x�xGA$3a1^0c0GA$3a1�x�xlibpcrecpp.so.0.0.0-8.02-9.1.el8.x86_64.debug��B�7zXZ�ִF!t/��w�]?�E�h=��ڊ�2N���^ �����n�|�굧Dv�4GO�+RPs��P�
 ����3 M�7�燦��CEC�8������vpP�:��V��fC4�%�#+�@g�@;F�ӵ�w&�_E�*!="�s
v2����+��Njl^�#y�UŐ�8!���a�5gz���U�ő�C�o4���:�Po������E_�'Pn2Y��CO��%�2���%vm�x|{pXcɮ�!وmD�4���N��l��f=������ͯe*6�7ְr�G�K9ո��}H�޻o�^�T�jVT�i��>�/����u���f�D�p���J��zc{��_��۩�7Gp;���O��X�g�	��?��߮K&�w`JHՃ�eB�I��颛}.Q�:ʞ~�Jʢ�Wu��#MZ9󩸂��q1�MЌ�J�z���Wm�S�]o�*Jn�z#���1i0s2�ɩ�}j=Pr�v�9��.\���	��@?�Ҟ��ܓ}OL�%����`��J�t�vH�b�J-���o(�U�_Q3Dz����S��$��
6}�O2A@��=�'�,����"�6Ջ_k�F�Q��t+M.�v�X
S�]��2����o�A�R�h�����ɱ�њm��Dc���W���˘�k;���x߼��D�H��4���A�pN���m��G�X����`�O�R������]D�l@�7?o�eV��Y[��a�ED��`��V8NP +�4�a�4+�Ǝ��� �%�o�o_c���� �/ �O��K��('���N���1u�����v(��~n��1$�1�f��ߴ	h[��-
�wJ�-=ۨ�O���������aA����hS\�w�V�_�wRU�2� y;@��(��?Y�KmM*���g�8ϳ��[+�2
s�5\gL=�/�<���9w;O��v��1�hԱ�zh��[�£��0����dԪtY��p�;-�y$W0�1*��!U�Sg#V��H��Ǣ�l���TȄ&�1(�~�o�#��[?�����"CԳM�4~�?�����
[��B�Z��*a�M��E��a\M_���TI!���
�����.&3��ˬ�]?�=���3l���
.Di�X�2��!*�@S�'�x�����Gd�\hN� �7?���}�	�*�'1/��g�YZ.shstrtab.note.gnu.build-id.gnu.hash.dynsym.dynstr.gnu.version.gnu.version_r.rela.dyn.rela.plt.init.plt.sec.text.fini.rodata.eh_frame_hdr.eh_frame.gcc_except_table.note.gnu.property.init_array.fini_array.data.rel.ro.dynamic.got.data.bss.gnu.build.attributes.gnu_debuglink.gnu_debugdata$���o(((HH�0���8���o�'�'E���o�(�(�Tp)p)h^B�*�*phH0H0cp0p0�n 4 4�w�7�7�@}�x�x
��x�x� �((4�`�`���$�$����� �h� h���� ����� ����� ��@�Н Н0�� � � �h ��`�08�4+l��T�:usr/lib64/libpcrecpp.a000064400000226626150403561430010601 0ustar00!<arch>
/               1575493209  0     0     0       5174      `
`��������������������������������������������������������������������������̶̶̶̶̶̶̶̶̶̶̶̶̶̶̶̶̶̶̶̶��_ZN7pcrecpp3Arg10parse_nullEPKciPv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Ev_ZN7pcrecpp2RE7CleanupEv_ZN7pcrecpp2RED2EvDW.ref.__gxx_personality_v0_ZN7pcrecpp2RED1Ev_ZNK7pcrecpp2RE8TryMatchERKNS_11StringPieceEiNS0_6AnchorEbPii_ZNK7pcrecpp2RE23NumberOfCapturingGroupsEv_ZNK7pcrecpp2RE11DoMatchImplERKNS_11StringPieceENS0_6AnchorEPiPKPKNS_3ArgEiS5_i_ZNK7pcrecpp2RE9FullMatchERKNS_11StringPieceERKNS_3ArgES6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6__ZN7pcrecpp2RE6no_argE_ZNK7pcrecpp2RE12PartialMatchERKNS_11StringPieceERKNS_3ArgES6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6__ZNK7pcrecpp2RE7ConsumeEPNS_11StringPieceERKNS_3ArgES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5__ZNK7pcrecpp2RE14FindAndConsumeEPNS_11StringPieceERKNS_3ArgES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5__ZNK7pcrecpp2RE7DoMatchERKNS_11StringPieceENS0_6AnchorEPiPKPKNS_3ArgEi_ZN7pcrecpp3Arg17parse_stringpieceEPKciPv_ZN7pcrecpp3Arg10parse_charEPKciPv_ZN7pcrecpp3Arg11parse_ucharEPKciPv_ZN7pcrecpp3Arg16parse_long_radixEPKciPvi_ZN7pcrecpp3Arg17parse_ulong_radixEPKciPvi_ZN7pcrecpp3Arg17parse_short_radixEPKciPvi_ZN7pcrecpp3Arg18parse_ushort_radixEPKciPvi_ZN7pcrecpp3Arg15parse_int_radixEPKciPvi_ZN7pcrecpp3Arg16parse_uint_radixEPKciPvi_ZN7pcrecpp3Arg20parse_longlong_radixEPKciPvi_ZN7pcrecpp3Arg21parse_ulonglong_radixEPKciPvi_ZN7pcrecpp3Arg12parse_doubleEPKciPv_ZN7pcrecpp3Arg11parse_floatEPKciPv_ZN7pcrecpp3Arg11parse_shortEPKciPv_ZN7pcrecpp3Arg15parse_short_hexEPKciPv_ZN7pcrecpp3Arg17parse_short_octalEPKciPv_ZN7pcrecpp3Arg18parse_short_cradixEPKciPv_ZN7pcrecpp3Arg12parse_ushortEPKciPv_ZN7pcrecpp3Arg16parse_ushort_hexEPKciPv_ZN7pcrecpp3Arg18parse_ushort_octalEPKciPv_ZN7pcrecpp3Arg19parse_ushort_cradixEPKciPv_ZN7pcrecpp3Arg9parse_intEPKciPv_ZN7pcrecpp3Arg13parse_int_hexEPKciPv_ZN7pcrecpp3Arg15parse_int_octalEPKciPv_ZN7pcrecpp3Arg16parse_int_cradixEPKciPv_ZN7pcrecpp3Arg10parse_uintEPKciPv_ZN7pcrecpp3Arg14parse_uint_hexEPKciPv_ZN7pcrecpp3Arg16parse_uint_octalEPKciPv_ZN7pcrecpp3Arg17parse_uint_cradixEPKciPv_ZN7pcrecpp3Arg10parse_longEPKciPv_ZN7pcrecpp3Arg14parse_long_hexEPKciPv_ZN7pcrecpp3Arg16parse_long_octalEPKciPv_ZN7pcrecpp3Arg17parse_long_cradixEPKciPv_ZN7pcrecpp3Arg11parse_ulongEPKciPv_ZN7pcrecpp3Arg15parse_ulong_hexEPKciPv_ZN7pcrecpp3Arg17parse_ulong_octalEPKciPv_ZN7pcrecpp3Arg18parse_ulong_cradixEPKciPv_ZN7pcrecpp3Arg14parse_longlongEPKciPv_ZN7pcrecpp3Arg18parse_longlong_hexEPKciPv_ZN7pcrecpp3Arg20parse_longlong_octalEPKciPv_ZN7pcrecpp3Arg21parse_longlong_cradixEPKciPv_ZN7pcrecpp3Arg15parse_ulonglongEPKciPv_ZN7pcrecpp3Arg19parse_ulonglong_hexEPKciPv_ZN7pcrecpp3Arg21parse_ulonglong_octalEPKciPv_ZN7pcrecpp3Arg22parse_ulonglong_cradixEPKciPv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm_ZN7pcrecpp3Arg12parse_stringEPKciPv_ZNK7pcrecpp2RE7RewriteEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKNS_11StringPieceESA_Pii_ZNK7pcrecpp2RE7ReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZNK7pcrecpp2RE7ExtractERKNS_11StringPieceES3_PNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZNK7pcrecpp2RE13GlobalReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZN7pcrecpp2RE9QuoteMetaB5cxx11ERKNS_11StringPieceE_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag_ZN7pcrecpp2RE7CompileENS0_6AnchorE_ZN7pcrecpp2RE4InitERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPKNS_10RE_OptionsE_ZN7pcrecpp6no_argE_ZN7pcrecpp7ScannerC2Ev_ZN7pcrecpp7ScannerC1Ev_ZN7pcrecpp7ScannerC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZN7pcrecpp7ScannerC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZN7pcrecpp7ScannerD2Ev_ZN7pcrecpp7ScannerD1Ev_ZN7pcrecpp7Scanner11DisableSkipEv_ZNK7pcrecpp7Scanner10LineNumberEv_ZNK7pcrecpp7Scanner6OffsetEv_ZNK7pcrecpp7Scanner9LookingAtERKNS_2REE_ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT__ZN7pcrecpp7Scanner11GetCommentsEiiPSt6vectorINS_11StringPieceESaIS2_EE_ZN7pcrecpp7Scanner15GetNextCommentsEPSt6vectorINS_11StringPieceESaIS2_EE_ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT__ZN7pcrecpp7Scanner11ConsumeSkipEv_ZN7pcrecpp7Scanner17SetSkipExpressionEPKcDW.ref.__gxx_personality_v0_ZN7pcrecpp7Scanner4SkipEPKc_ZN7pcrecpp7Scanner10EnableSkipEv_ZN7pcrecpp7Scanner7ConsumeERKNS_2REERKNS_3ArgES6_S6__ZlsRSoRKN7pcrecpp11StringPieceEDW.ref.__gxx_personality_v0//                                              20        `
pcre_stringpiece.o/
pcrecpp.o/      1575493209  1667  135   100644  47024     `
ELF>p�@@ED !"#$%&'()*,-./013456:;=��H�����DAUATI��UH��S��H����~�>���H���uLc�C�,�Ѓ�0��	wH��� ~)H��[]A\A]�@��ߍH�L��w�H��� �L��L��H���B�D-H��[]A\A]�Df.�H��dH�%(H�D$1���%ptH�T$dH3%(u}H���fDH�t$���D$��
t0��
t=

t4���t?���uI�P���f�� �f��0�f��@�s����H�
�wH�5H�=�f�AVH��A��AUI��ATUHc�H��S��H��@dH�%(H�L$81�H�|$�Y���H���D��H��H�t$�I��H��1�H9\$t$H�L$8dH3%(��u1H��@[]A\A]A^�DA�$��u�M��tI�E��ĺ��@AVH��AUA��ATI��UHc�H��S��H��@dH�%(H�L$81�H�|$����8-t+H���H��H�t$D���I��H��H9\$t)1�H�L$8dH3%(u>H��@[]A\A]A^�f.�A���u�M��tI�$���f����@AVH��A��AUI��ATUHc�H��S��H��@dH�%(H�L$81�H�|$���H���D��H��H�t$�I��H��1�H9\$t$H�L$8dH3%(��u1H��@[]A\A]A^�DA�$��u�M��tI�E��ĺ��@AVH��AUA��ATI��UHc�H��S��H��@dH�%(H�L$81�H�|$�9����8-t+H���H��H�t$D���I��H��H9\$t)1�H�L$8dH3%(u>H��@[]A\A]A^�f.�A���u�M��tI�$���f����@AU��I��ATUHc�H��SH��H���dH�%(H��$�1�H�\$H����D,�H��H�t$H��I���1�H9\$t&H��$�dH3%(u:H���[]A\A]�DA�$��u�M��t�AE��fD���@H�H��xTH9�v#H�H9�sH��x&H�7H�~�f.�H�x��H��������H�H���PH�=��fD��SH��H�0H��t�H�{8H��t�H�[@H�H9�t*H��t%H�;H�CH9�t�H�߾ [�fD[�@f.���SH��H���H�{�H9�t[�[�@f.���SH��@L�W8dH�%(H�D$81���LDW0�\$PM����HcG A��H�$H�D$H�D$H�D$H�D$ H�D$(��~
H�$H�D$HcG$��~
H�$H�D$(1��ɋNL�������€�E��E��D�H�H�5H��HD�H��SAQA���ZY��xu������	�1�H�\$8dH3%(uH��@[���f.���H��H�8dH�%(H�D$1�H��t.1�H�L$����u%�D$H�T$dH3%(uH��ø�������H�
��H�5H�=�fD��AWAVI��C�LIAUATUSH���D$HL�d$@9���H����M��1�PD��A�M��I��H���ZY����t_A�D$A���tjM��teL���9�B��~U�E�I��I�l���I��I��L9�t3Ic|$�I�EA�4$H�)�H;�P��u�H��1�[]A\A]A^A_�fDH���[]A\A]A^A_�H�
�7H�5H�=�H�
�5H�5H�=�@f.���AWAVAUATUSH��H��$L��$�L��$�H��$�H�$H��$H��$�L��$�H�D$H��$ L��$�L��$H�D$H��$(L��$H�D$dH�%(H��$�1�H�H9��eH�T$0H9��_H�L$8I9��YL�D$@I9���L�L$HI9��ML�T$PI9��OL�\$XH9��QH�\$`H9��sH�l$hI9��EL�d$pI9��gL�l$xI9��iL��$�I9��hH�$L��$�H9��cH��$�H�\$H9��]H��$�H�\$H9��WH��$�H�\$H9���H��$�A���A�f.�H�L$,j3�H��$�PL�D$@�ZYH��$�dH3%(��H�Ę[]A\A]A^A_�fDE1��A��A���A���A��u���DA��e���DA��U���A��J���f.�A��5���DA�	�%���DA�
����DA�����DA����DA�
���DA��������AWAVAUATUSH��H��$L��$�L��$�H��$�H�$H��$H��$�L��$�H�D$H��$ L��$�L��$H�D$H��$(L��$H�D$dH�%(H��$�1�H�H9��eH�T$0H9��_H�L$8I9��YL�D$@I9���L�L$HI9��ML�T$PI9��OL�\$XH9��QH�\$`H9��sH�l$hI9��EL�d$pI9��gL�l$xI9��iL��$�I9��hH�$L��$�H9��cH��$�H�\$H9��]H��$�H�\$H9��WH��$�H�\$H9���H��$�A���A�f.�H�L$,j31�H��$�PL�D$@�ZYH��$�dH3%(��H�Ę[]A\A]A^A_�f�E1��A��A���A���A��u���DA��e���DA��U���A��J���f.�A��5���DA�	�%���DA�
����DA�����DA����DA�
���DA��������AWAVAUATUSH��H��H��$H��$�L��$�L��$�H�$H��$H��$�L��$�H�D$H��$ L��$�L��$H�D$H��$(L��$H�D$dH�%(H��$�1�H�H9��rH�T$0H9��lH�L$8I9��fL�D$@I9���L�L$HH9��ZH�t$PI9��\L�T$XI9��^L�\$`H9���H�l$hI9��RL�d$pI9��tL�l$xI9��vL��$�I9��uH�4$L��$�H9��pH��$�H�t$H9��jH��$�H�t$H9��dH��$�H�t$H9���H��$�A��fDA�f�H�L$,j3�H��H��$�PL�D$@�ZY��tHcL$,H)KH��$�dH3%(��H�Ę[]A\A]A^A_�@E1��A��A���A��u���DA��e���DA��U���DA��E���A��:���f.�A��%���DA�	����DA�
����DA����DA����DA�
����DA��������AWAVAUATUSH��H��H��$H��$�L��$�L��$�H�$H��$H��$�L��$�H�D$H��$ L��$�L��$H�D$H��$(L��$H�D$dH�%(H��$�1�H�H9��rH�T$0H9��lH�L$8I9��fL�D$@I9���L�L$HH9��ZH�t$PI9��\L�T$XI9��^L�\$`H9���H�l$hI9��RL�d$pI9��tL�l$xI9��vL��$�I9��uH�4$L��$�H9��pH��$�H�t$H9��jH��$�H�t$H9��dH��$�H�t$H9���H��$�A��fDA�f�H�L$,j31�H��H��$�PL�D$@�ZY��tHcL$,H)KH��$�dH3%(��H�Ę[]A\A]A^A_��E1��A��A���A��u���DA��e���DA��U���DA��E���A��:���f.�A��%���DA�	����DA�
����DA����DA����DA�
����DA��������AWAVAUATUSH��xdH�%(H�D$h1�E����C�\IH��I��A��I�΃�7SH�D$P���XZH�T$hdH3%(��uwH��x[]A\A]A^A_�fDHc�D�L$H��L�$�SL��L��PD�L$I��D��L�D$H���L�����Y^�H�
�[H�5H�=�����H��tH�:�r�Ðf.���1���uH��t����D��f.���1���uH��t����D��f.�����u1��D�;��f.�����u1��D����f.���SH��dH�%(H�D$1���u1�H�\$dH3%(u6H��[�H��H�������t�H�$H���H����w�H��t�f���fD��SH��dH�%(H�D$1���u1�H�\$dH3%(u/H��[�H��H���
���t�H�$H����w�H��t�f����f�f.���SH��dH�%(H�D$1���u1�H�\$dH3%(u7H��[�H��H�������t�H�$�������H�H9�w�H��t�����@��SH��dH�%(H�D$1���u1�H�\$dH3%(u/H��[�H��H���-���t�H�$�����H9�w�H��tʼn���f�f.�����u1��D���f.�����u1��D�;��f.�����t���~1��D����f���SH��dH�%(H�D$1���t���~1�H�L$dH3%(u,H��[�H��H������t�H��t�f��Z$��������
�b���f�����R���f�����B���f���1��5����@���
���f�������f�����r���f���1��e����@���
����f�������f�������f���1�����@���
��f�������f��������f���1������@����u1��D�
�f�fD����u1��D��F�fD����u1��D��&�fD����u1��D1��	�������u1��D�
��fD����u1��D��v�fD����u1��D��V�fD����u1��D1��9�������u1��D�
���fD����u1��D���fD����u1��D���fD����u1��D1��y�������u1��D�
��fD����u1��D����fD����u1��D����fD����u1��D1�������H��t'H��H��H�RH��Lc�H��1���H���f���f.���AWAVAUATM��UH��SH��H�D�L$LcJH�L$N�4I9�w5�x@�S�BЃ�	w|;D$�r�H�Ic4�����H��I9�vE�L�{<\t�H�]H�UH�ML�kH9���H�MI9�w|�H�EL�m�DL��I9�w�H���[]A\A]A^A_Ã�\��L�mH�UH�MM�}H9���H�MI9���B�*\H�UH��L�}B�D*�W���D1�A�1�H��H��D$�H�U�D$�]���f�A�T�H�D$)�H0H��������H+EHc�H9�wlH������fD��
���fD1�A�1�L��H���H�U�I���f.���,���fDH��1�[]A\A]A^A_�H�=��f���AW1�A�AVAUI��ATU1�SH��H��(H�t$L�|$@L�t$H��M��L��dH�%(H��$ 1�H�H�D$H�B1҉D$ j3�ZY��u0H��$dH3%(����H��([]A\A]A^A_�DL�d$ A��M��L��I�T$L��L���D$0H�T$ H�H�D$(H�T$H�S�T$H�T$��ń�t@�D$@��xY�T$D��xpH�{)�Hc�L�D$(Hc�H�L$ H��H)�H9�HG�H9�wgH���H�|$ I��L9��2�����(����H�
�VH�5H�=�H�
�WH�5H�=�H��H��H�=1�H�5���H���@f.���AVA�I��1�AUI��ATUH��1�SH��H��H���dH�%(H��$�1�I��H��j3M���A��X1�ZE��t"I�M��H��L��I�FL��H����H��$�dH3%(uH���[]A\A]A^��f.���UH��AWAVAUATSH��XH������H������dH�%(H�E�1�H�����ƅ��H������H��H�����H�BHDž������gH�����I��1�H������E1�H������Dž����H��������H�������L��L������j3H�������D��L������E1��^A��_D���dD���D���A9��E9��TH������D��Hc�)�H�OHc�H��H)�H9�HG�H�H9���H������H��H������L������E��L��H������H������H�H������H������H�C������H�K������E9�����D9��hL������M�E�����D��H��L������1�L��j3H������L������A���������ZA��Y���������������H�����H������H��H9�t�H�M�dH3%(�������nH�e�[A\A]A^A_]�I�MA�\$��9���Mc�I9���H������H�0B�<.
�<A�G)uCD9���I���"������A�(=0�0H������H�H��A�\$��tx9�}tHc�H9��SH������H�6�I��%�=���H��������Hc�I���&H9���<H�����������9��L��������D9�9������H������H�0�6���f�H������Mc�H�0��H��D)�L)�Hc�H9�HG�I9��H������L��H������H�@��9��O���H������L�����f�Hc�H9��a�<
A�(��������������DL��������A9�������f���f�H������H�8H������H������H�����L�����H��H��H9���L����H9���H������H�����H�H�FH����H������L�HH������H�����L�@�~���H������H�HH�89��o���Hc�H��H)�H9���H�4H�������H������H�8H�H�:����;��A�(=@������'��H������A�(H�I��=P��������fDH9�tYH������fo���H�����H�BH�2JH�����,���H������H�����H���oVH�)�����)������M��t,fo���H��tRH������H���ofF)�������H��H������H������H�@�@�U���Dž�����F���H������@L�@�2���H��H�=1�H�5�H�
��H�5H�=�L�H�
1��+H�5H�=��fDH�
��H�5H�=�H��H�5H�=1��L��H�5H�=1�����H�����AWH�GAVAUATUSH��H��H�H�D$�G�FH�G����I��1�E1��dfDD��H�M�u��߃�A<vA�D$�<	vE��x
A��_��H9T$��H�CI9�wxF�$*H�L�sB�0H��A9o~FL�kI�D�$(E��u�H��������L)�H��� �H�5H���H��A9o�H��H��[]A\A]A^A_�fDA�1�1�L��H���H��k�����Y���fDH9T$��H�CI9�wlB�*\H�L�sB�0I�L�cD�4(H�M�l$H9D$t`H�SI9�wF�4 H�L�kB�D �	����A�1�1�L��H���H���fDA�1�1�L��H���H��w�����f���U���H�=���H���@f.���AUATUSH��H��H�o(dH�%(H�D$81���tGH�?��H�L$H�T$E1��H��H����H�L$8dH3%(H���H��H[]A\A]�f�L�d$H�H�r�I�D$L��H�D$�H�SH�3L���H��������H+D$H�����H�5L���H�|$H�L$E1���H�T$�H�|$I��H��L9��G����H���B���fDH�H9C@�+���� �L�l$H�����I��H�@I�$M��t
L���I�TL��L���L�c@����H�=���H�����H����@f.���AWAVAUATI��USH��H��dH�%(H�D$1�H9�t7L�/L�I��H�nM9��H�wH9���H����H�kA�D-M����I�$H�C A�D$�C(H�H�C01�H��H�C@H�C8�H�C8H��t�H���H�C0H�D$dH3%(��H��[]A\A]A^A_ÐI�6H��tL��H���L�+�^���@H��H�,$���H�;I��I9�t�H�$L�+H�CH���*����fDH�H�C ��C(�,��������fD�A�EL�+����
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA$3h864
GA$3h864GA*GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!��H�H�GH9�tH�����pcrecpp.ccbasic_string::_M_createpcre_retval == 0(1 + n) * 3 <= vecsizematches >= 0n >= 0basic_string::appendvec[0] >= 0vec[1] >= 0basic_string::replace__pos <= size()matchstart >= startmatchend >= matchstart\0(?:)\zNULL == "Unexpected return value from pcre_config(NEWLINE)"%s: __pos (which is %zu) > this->size() (which is %zu)/usr/include/c++/8/bits/basic_string.h%s:%d: %s: Assertion '%s' failed.
����e����������
�����������;W�K�������� GA*GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!��AWAVM��AUATL�gUH��SH��H��8H�wdH�%(H�D$(1�H�DI��H�D$I)�L��H)�H�H�D$ L;'�H�wH�|$ H�L$�H��L�H�L$I��t,H����L��H��H��H�L$L�D$�H�L$L�D$H��t(M��t#I�</I����L��H��L�D$�L�D$M��t(H�t$J�|5L�L�I��t_L��L�D$�L�D$M9�tL���H�D$ L�;H�CH�D$(dH3%(uNH��8[]A\A]A^A_�DA�A��]���@���f����fD���\����GA*GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!��USH��L�CH��H�GH�?H�,L9�tOL�CL9�w.H��tH�H��tH�H�;H�kH���/H��[]�fDI��H��H��1�H���H�;��A�����H�;�GA*GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!��H��������AWAVAUATUH��SH�H��(H�WH)�I9���M��H�H��H�I)�N�$2H9���H�{L9��dL�<0I��H�I)�A��L9�@��A!�H9�vtE��t.I�4/K�<I���ML��L�D$H�L$�L�D$H�L$M��tI���L��H��L���H�L�cB� H��(H��[]A\A]A^A_�fDH�H9�w�M���L9���I����L��H��L��L�L$L�D$H�L$�E��H�L$L�D$L�L$t�I�4/K�<I����L��L�D$H�L$�L�D$H�L$L9��M���L�J�H9���H9���J�41I���L��L�������f.�H��H�������A���D������fD��Y���fD�A�E���g����4���@I��t�L��H��L����������4���fDH)�H��tDH��tH��H��L��L�D$�L�D$L��K�4I�</H)�H��tH���A�����7����A������%����A�����H�=�basic_string::_M_replaceH�|$ I��L9�t�H����H������H�����H��H9�t�H���H�;H9|$t�H����L�� �H���H�|$I��L9�t�H���GA*GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!��ATI��UH��SH��dH�%(H�D$1�H��t	H����L)�H��H�$H��wPH�EH��u6A�$�H�$H�EH�]�H�D$dH3%(uYH��[]A\�fDH��t��f�H��1��H�$H�EH�UH��L��H���H�$H�E�H�=��basic_string::_M_construct null not validGA*GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!��H�H��H�H�H�H�H�=H�p�H�H���H��H���int pcrecpp::RE::NumberOfCapturingGroups() constbool pcrecpp::RE::DoMatch(const pcrecpp::StringPiece&, pcrecpp::RE::Anchor, int*, const pcrecpp::Arg* const*, int) constbool pcrecpp::RE::DoMatchImpl(const pcrecpp::StringPiece&, pcrecpp::RE::Anchor, int*, const pcrecpp::Arg* const*, int, int*, int) conststd::__cxx11::basic_string<_CharT, _Traits, _Alloc>::reference std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::operator[](std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::size_type) [with _CharT = char; _Traits = std::char_traits<char>; _Alloc = std::allocator<char>; std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::reference = char&; std::__cxx11::basic_string<_CharT, _Traits, _Alloc>::size_type = long unsigned int]int pcrecpp::NewlineMode(int)int pcrecpp::RE::GlobalReplace(const pcrecpp::StringPiece&, std::__cxx11::string*) constbool pcrecpp::RE::Replace(const pcrecpp::StringPiece&, std::__cxx11::string*) constGCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�0!HD�B�B�D �D(�F0{
(A ABBEr(A ABB��D m
G@��B�H�E �A(�G0�Fp]
0A(A BBBF@��B�E�E �D(�G0�Fp`
0A(A BBBK@4�B�H�E �A(�G0�Fp]
0A(A BBBF@x�B�E�E �D(�G0�Fp`
0A(A BBBK8��B�J�A �G(�J�f
(A ABBF�i]bE�P
KAzPLRx�� $"E�V
EA,tE�DP�XB`IXAPn
AA�zH F
Al�"F�B�J �B(�A0�A8�D@UHHPXHA@m
8C0A(B BBBGD
8F0A(B BBBA\0�F�B�B �B(�A0�A8�G���N�K�A�^
8A0A(B BBBG\��F�B�B �B(�A0�A8�G���K�K�A�^
8A0A(B BBBJ\�F�B�B �B(�A0�A8�J���Q�K�A�m
8A0A(B BBBE\PF�B�B �B(�A0�A8�J���N�K�A�m
8A0A(B BBBHh��F�B�B �B(�A0�A8�D�p�F�H�A�V
8A0A(B BBBGV�G�c�A�0&D&Xl �jE�D j
AD �cE�D j
AD �kE�D j
AD �cE�D j
AD$8 LhE�D r
ADp�������$8L`t������(<Pdx�����H�F�B�E �B(�E0�D8�Gp
8A0A(B BBBF(<	�E�A�K ~
AAGHh	�P�B�B �B(�A0�D8�G`�
8D0A(B BBBG�	6M`d�	�F�B�B �B(�D0�D8�DP�
8F0A(B BBBA
8C0A(B BBBA\�F�J�B �E(�A0�C8�J�S�m�F�A�d
8A0A(B BBBF,zPLRx���������4P�
�F�M�E �A(�F0�M�Z�B�L�C�A
0A(A BBBA<	�E�C
P������.n.�.c
AL.,zPLRx��������4$L�	F�F�B �B(�A0�A8�GP�
8D0A(B BBBG(zPLRx��P������00L�F�D�D �D0c
 AABG<T
�F�B�A �A(�GpV
(A ABBC$zPLRx��p����,0H
|F�B�B �B(�D0�A8�GP�
8A0A(B BBBBL
rOb|,-Ea~��!/�T��p9 ����0���>��u`�� i�  
91L9���9@y!g!!�%%�%a)�)�)9�T�,�9@Yo9`��,$0,@l0�0�0X	,X0�	�	5�	5r5
5r9,1
6
;
@
E
$J
LO
5T
YY
`^
ud
�j
@p
�v
�|
��
x�
��
��
��
��
��
+�
2�
�
�
&	:	
#'+.237:<=>?@Kn"!���"!�+9JQY`hu|���b���"�!:��"

0K
U
@z�
�
�"�
��
C�����x��������&&=0gP�pj��c�Pk�c<0jP�p��h�. X0�@�P�`�p(�I�o�������
�3�]� �@�`���F�p��� �@`H�p������"!@"%��")���6�@�b ������Y� ����P'"0�j�`)����+|)6.annobin_pcrecpp.cc.annobin_pcrecpp.cc_end.annobin_pcrecpp.cc.hot.annobin_pcrecpp.cc_end.hot.annobin_pcrecpp.cc.unlikely.annobin_pcrecpp.cc_end.unlikely.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.start.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.end_ZN7pcrecppL15TerminateNumberEPcPKci_ZN7pcrecppL11NewlineModeEi_ZZN7pcrecppL11NewlineModeEiE19__PRETTY_FUNCTION___ZN7pcrecpp3Arg16parse_long_radixEPKciPvi.part.14_ZN7pcrecpp3Arg17parse_ulong_radixEPKciPvi.part.15_ZN7pcrecpp3Arg20parse_longlong_radixEPKciPvi.part.16_ZN7pcrecpp3Arg21parse_ulonglong_radixEPKciPvi.part.17_ZN7pcrecpp3Arg12parse_doubleEPKciPv.part.18_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm.isra.37_ZN7pcrecppL12empty_stringE_ZZNK7pcrecpp2RE23NumberOfCapturingGroupsEvE19__PRETTY_FUNCTION___ZZNK7pcrecpp2RE11DoMatchImplERKNS_11StringPieceENS0_6AnchorEPiPKPKNS_3ArgEiS5_iE19__PRETTY_FUNCTION___ZZNK7pcrecpp2RE7DoMatchERKNS_11StringPieceENS0_6AnchorEPiPKPKNS_3ArgEiE19__PRETTY_FUNCTION__.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm.start.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm.end.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm.start.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm.end.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm.start.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm.end_ZZNK7pcrecpp2RE7ReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE19__PRETTY_FUNCTION___ZNK7pcrecpp2RE7ReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.cold.44_ZZNK7pcrecpp2RE13GlobalReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEE19__PRETTY_FUNCTION___ZZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEixEmE19__PRETTY_FUNCTION___ZNK7pcrecpp2RE13GlobalReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE.cold.45_ZN7pcrecpp2RE9QuoteMetaB5cxx11ERKNS_11StringPieceE.cold.46.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.start.annobin__ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.end_ZN7pcrecpp2RE7CompileENS0_6AnchorE.cold.47_ZN7pcrecppL15default_optionsE.annobin__GLOBAL__sub_I__ZN7pcrecpp2RE6no_argE.start.annobin__GLOBAL__sub_I__ZN7pcrecpp2RE6no_argE.end_GLOBAL__sub_I__ZN7pcrecpp2RE6no_argE.LC0.LC1.LC2.LC3.LC4.LC6.LC5.LC7.LC9.LC10.LC11.LC13.LC12.LC18.LC15.LC16.LC17.LC19.LC21.LC24.LC25.LC8.LC23.text.hot.group.text.unlikely.group_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED5Ev.text.unlikely..group.text.startup.group.text.hot..group_ZN7pcrecpp3Arg10parse_nullEPKciPv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev_GLOBAL_OFFSET_TABLE__ZdlPv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED1Evisspacememcpypcre_config__stack_chk_fail__assert_fail__errno_locationstrtolstrtoulstrtoqstrtouq__memcpy_chkstrtod_Znwm_ZSt20__throw_length_errorPKc_ZN7pcrecpp2RE7CleanupEvpcre_free_ZdlPvm_ZN7pcrecpp2RED2EvDW.ref.__gxx_personality_v0_ZN7pcrecpp2RED1Ev_ZNK7pcrecpp2RE8TryMatchERKNS_11StringPieceEiNS0_6AnchorEbPiipcre_exec_ZNK7pcrecpp2RE23NumberOfCapturingGroupsEvpcre_fullinfo_ZNK7pcrecpp2RE11DoMatchImplERKNS_11StringPieceENS0_6AnchorEPiPKPKNS_3ArgEiS5_i_ZNK7pcrecpp2RE9FullMatchERKNS_11StringPieceERKNS_3ArgES6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6__ZNK7pcrecpp2RE12PartialMatchERKNS_11StringPieceERKNS_3ArgES6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6_S6__ZNK7pcrecpp2RE7ConsumeEPNS_11StringPieceERKNS_3ArgES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5__ZNK7pcrecpp2RE14FindAndConsumeEPNS_11StringPieceERKNS_3ArgES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5__ZNK7pcrecpp2RE7DoMatchERKNS_11StringPieceENS0_6AnchorEPiPKPKNS_3ArgEi_Znam_ZdaPv_ZN7pcrecpp3Arg17parse_stringpieceEPKciPv_ZN7pcrecpp3Arg10parse_charEPKciPv_ZN7pcrecpp3Arg11parse_ucharEPKciPv_ZN7pcrecpp3Arg16parse_long_radixEPKciPvi_ZN7pcrecpp3Arg17parse_ulong_radixEPKciPvi_ZN7pcrecpp3Arg17parse_short_radixEPKciPvi_ZN7pcrecpp3Arg18parse_ushort_radixEPKciPvi_ZN7pcrecpp3Arg15parse_int_radixEPKciPvi_ZN7pcrecpp3Arg16parse_uint_radixEPKciPvi_ZN7pcrecpp3Arg20parse_longlong_radixEPKciPvi_ZN7pcrecpp3Arg21parse_ulonglong_radixEPKciPvi_ZN7pcrecpp3Arg12parse_doubleEPKciPv_ZN7pcrecpp3Arg11parse_floatEPKciPv_ZN7pcrecpp3Arg11parse_shortEPKciPv_ZN7pcrecpp3Arg15parse_short_hexEPKciPv_ZN7pcrecpp3Arg17parse_short_octalEPKciPv_ZN7pcrecpp3Arg18parse_short_cradixEPKciPv_ZN7pcrecpp3Arg12parse_ushortEPKciPv_ZN7pcrecpp3Arg16parse_ushort_hexEPKciPv_ZN7pcrecpp3Arg18parse_ushort_octalEPKciPv_ZN7pcrecpp3Arg19parse_ushort_cradixEPKciPv_ZN7pcrecpp3Arg9parse_intEPKciPv_ZN7pcrecpp3Arg13parse_int_hexEPKciPv_ZN7pcrecpp3Arg15parse_int_octalEPKciPv_ZN7pcrecpp3Arg16parse_int_cradixEPKciPv_ZN7pcrecpp3Arg10parse_uintEPKciPv_ZN7pcrecpp3Arg14parse_uint_hexEPKciPv_ZN7pcrecpp3Arg16parse_uint_octalEPKciPv_ZN7pcrecpp3Arg17parse_uint_cradixEPKciPv_ZN7pcrecpp3Arg10parse_longEPKciPv_ZN7pcrecpp3Arg14parse_long_hexEPKciPv_ZN7pcrecpp3Arg16parse_long_octalEPKciPv_ZN7pcrecpp3Arg17parse_long_cradixEPKciPv_ZN7pcrecpp3Arg11parse_ulongEPKciPv_ZN7pcrecpp3Arg15parse_ulong_hexEPKciPv_ZN7pcrecpp3Arg17parse_ulong_octalEPKciPv_ZN7pcrecpp3Arg18parse_ulong_cradixEPKciPv_ZN7pcrecpp3Arg14parse_longlongEPKciPv_ZN7pcrecpp3Arg18parse_longlong_hexEPKciPv_ZN7pcrecpp3Arg20parse_longlong_octalEPKciPv_ZN7pcrecpp3Arg21parse_longlong_cradixEPKciPv_ZN7pcrecpp3Arg15parse_ulonglongEPKciPv_ZN7pcrecpp3Arg19parse_ulonglong_hexEPKciPv_ZN7pcrecpp3Arg21parse_ulonglong_octalEPKciPv_ZN7pcrecpp3Arg22parse_ulonglong_cradixEPKciPv_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcmmemmove_ZN7pcrecpp3Arg12parse_stringEPKciPv_ZNK7pcrecpp2RE7RewriteEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEERKNS_11StringPieceESA_Pii_ZNK7pcrecpp2RE7ReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZSt24__throw_out_of_range_fmtPKcz_Unwind_Resume_ZNK7pcrecpp2RE7ExtractERKNS_11StringPieceES3_PNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZNK7pcrecpp2RE13GlobalReplaceERKNS_11StringPieceEPNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEprintfabort_ZN7pcrecpp2RE9QuoteMetaB5cxx11ERKNS_11StringPieceE_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag_ZSt19__throw_logic_errorPKc_ZN7pcrecpp2RE7CompileENS0_6AnchorEpcre_compilestrlen_ZN7pcrecpp2RE4InitERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPKNS_10RE_OptionsE__dso_handle__cxa_atexit_ZN7pcrecpp6no_argE*x��������36��������P6��������y6���������y���������z��������[{��������b4n7��������u8��������z|���������}���������~��������({��������p}������������������{��������+}��������G����������{���������}������������������X{�������������������}������������������{���������9����������������������������������������v��������
����������6������������������1{��������n����������{���������4���������7���������:���������|��������
���������/����������4��7���������;���������|���������4��7���������<���������|���������	����������
����������{��������~����������
����������{��������q��������������������{��������q��������������������{�����������������Y���������y��������������������4<�7���������=���������|���������{���������{��������?{���������{��������{���������{�������� ���������e�����������������������������>������������������������������������H���������_v��������i{��������p4�|7���������?���������|���������4��7���������@���������|���������A���������B������������������, ���������Z ���������� {��������h!����������!���������"���������}"����������"v��������&$���������Y%����������&A���������&>���������&����������&4<�&7���������&C���������&|���������&D���������&4\�&E���������&F���������&����������&����������&4<�&7��������'G��������'|��������'>��������'A��������#'���������-'>��������4'A��������;'���������@'{��������"(H��������*(���������a(����������(���������)���������=)>��������B)����������)����������)I���������)����������)���������"*J��������**���������C*���������]*v��������s*�*����������*����������*����������*{���������*>���������*����������+�+����������+����������+y��������,v��������;,E,x,{��������B���������U���������x�������������������v���������5��������L'5N)5<�*5T�*5i |,   v�������� i�y���������y���������y��������v��������{{��������  9y��������g���������" #�����������y��������-���������h����������y������������������(���������e����������y���������K������������������v�����������������4v��������<���������Kv��������S���������a���������i���������|v������������������+ ,��y���������L�������������������{��������2 2rs��������������������������$�+,2w��������=D N+T���������[e2� 4
H�����08�|��`� �?�T]x0�@��4�����T��� �4�H\0pP�p���P��0(P<pP�t�� �0�@�P�`p�(�<�P�d�x������� �@�`��,�@�Th |@�`���������@	!l	$�	�	@8
 A
�
��
5�
�
�4� =��5�&�P'�.�85@A9P-�`)�A���5X�`
+P
2.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.rela.gnu.build.attributes.unlikely.rela.gnu.build.attributes..text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.rela.text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEED2Ev.rodata.str1.1.rodata.str1.8.gcc_except_table.rela.gnu.build.attributes..text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm.rela.text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm.rela.gnu.build.attributes..text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm.rela.text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_appendEPKcm.rela.gnu.build.attributes..text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm.rela.text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm.rodata._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEmmPKcm.str1.1.rela.text.unlikely.rela.gnu.build.attributes..text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.rela.text._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.rodata._ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIPKcEEvT_S8_St20forward_iterator_tag.str1.8.rela.gnu.build.attributes..text.startup.rela.text.startup.rela.init_array.rodata.rela.data.rel.local.DW.ref.__gxx_personality_v0.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@BMPBN`BOtB��B��B��BP�B��BQ�B��BR'|,"@��hB-|-3�-@ =�-�8@�0BST0bT0(]@�0B�|0�|0(|@H�0B��0��@x�0B�p1!�@��B@2�1�O2�2�^C3hu�3�p@��0B�p4�@��B!/�5�*@��0B#E�6��@��0B%�L7��@�0B'�8�J@�B)�2�:��:��@��B,p;�
@�0B.%@<��@8�xB02=*�<=�}@��0B3�>r�@� B5�x>�@�B7��>� �xB�@�B:0�B-S�B�BPC4C`
/@0�	B@`P�Cs	0eJ0�>pcre_scanner.o/ 1575493209  1667  135   100644  16792     `
ELF>X6@@-,"#%H�?H��x	H���PH�=�f���H�GH�GH�H�G 1��G�G(H�G0f�G8�G:H�G@�GH�f�f.���ATUSH��H��dH�%(H�D$1�H�GH�L�&H�nL��H�t	M����H�,$H��wxH��ubA�$�SH�k�(H�H�C0H�C H�C�C:�C(1�f�C8H�C@�CHH�D$dH3%(u\H��[]A\�f.�H��t��f�H������H�$H�H�SH��L��H���H�,$H��i���H�=��f���USH��H��H�o0H��tH����HH���H�k@H��tH�}H��t��H���H�;H��H9�tH��[]�f.�H��[]���H�0t�G8�PH�
�fH�5H�=���H�H�w �H9�s1ɀ:
��H���H9�u��@Ð@f.���H�G H+�@��H��H��E1�E1�dH�%(H�T$1�H�w �H�L$H���H�T$dH3%(uH�����@f.���L�G@M����AVAUATUSI�I;Xt\I��I��Hc�Lc��
H��I9XtDI�$H�H�4(H9�r�HcsL�H��H�H9�w�I�vI;vt*H�H��H���N�I�vI9Xu�[]A\A]A^�f�H��L���M�D$@�fDÐfD��H�G@H��t{ATUSHc_HH��HH;Xt]H��I���+fDH��SH��H��H�N�V�H�uA�D$HH9Xt*H�uH;uu�H��H��H���I�D$@A�D$HH9Xu�[]A\�@ÐfD��AVAUL�o ATI��USH�H��H�� L�w dH�%(H�D$1��f�A�|$9t-SI�|$0I��I��SH��H��L��SSSSSSSSSS�H��`��u�A�|$:tI�|$@tRI�D$ L)�H�D$dH3%(uhH�� []A\A]A^�f�I�|$@L�4$�D$H�wH;wt8L�6H���F�H�w뺿�H�H�@H�@I�D$@�H�����f���AWAVAUI��ATUH��SH��XH�_0dH�%(H�D$H1�H��tH����HH���M�����HL�d$ �I�T$L��L�x�@H��L�8H�@H�@ �@(H�T$ H�T$�H�T$H��I��H�D$��H����A�M�L$0H�D$(L��H���1��H�|$ I��L9�t��H�]0H��f�U8��@1�H�E0f�E8H�D$HdH3%(ukH��X[]A\A]A^A_�H�|$���H�L$H�D$ H�L$0L��L��H��H�L$�H�L$H�T$ H���N���f.�H���;���H��H������H�����H���f���AWAVAUI��ATUH��SH��XH�_0dH�%(H�D$H1�H��tH����HH���M�����HL�d$ �I�T$L��L�x�@H��L�8H�@H�@ �@(H�T$ H�T$�H�T$H��I��H�D$��H����A�M�L$0H�D$(L��H���1��H�|$ I��L9�t��H�]0H��f�U8��@1�H�E0f�E8H�D$HdH3%(ukH��X[]A\A]A^A_�H�|$���H�L$H�D$ H�L$0L��L��H��H�L$�H�L$H�T$ H���N���f.�H���;���H��H������H�����H���f���H�0t	�G8�|���PH�
�kH�5H�=�f�f.���SL�
H��H��H�s H��AQAQAQAQAQAQAQAQAQAQAQAQ�H��`��t�{8uH��[�DH�߈D$��D$H��[�
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA$3h864
GA$3h864basic_string::_M_createpcre_scanner.ccskip_ != NULLbasic_string::_M_construct null not validGA*GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!��AWH��AVAUATI��USH��H��H�oL�/H��L)�L)�H���+H�4I���H9���L��H�T$H�$�H�$H�T$I��I�H�@�rH�L�H��qL9�t;L��L��f�L�
D�BH��H��L�I�D�A�H9�u�H�C�L)�H���I�D H9�t;H��H��f�D�BL�
H��H��L�I�D�A�H9�u�H)�H�U�H���H�DM��tL��H�$�H�$M�4$M�|$I�D$H��[]A\A]A^A_�f�H��������H9�����H��u�E1�E1�������H��I������GA*GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!��AWH��AVAUATI��USH��H��H�oL�/H��L)�L)�H���+H�4I���H9���L��H�T$H�$�H�$H�T$I��I�H�@�rH�L�H��qL9�t;L��L��f�L�
D�BH��H��L�I�D�A�H9�u�H�C�L)�H���I�D H9�t;H��H��f�D�BL�
H��H��L�I�D�A�H9�u�H)�H�U�H���H�DM��tL��H�$�H�$M�4$M�|$I�D$H��[]A\A]A^A_�f�H��������H9�����H��u�E1�E1�������H��I������H�|$ I��L9�t�H�;I9�t�H�߾H�H����H�|$ I��L9�t�H�;I9�t�H�߾H�H�����Y�������0��Y�������0void pcrecpp::Scanner::EnableSkip()void pcrecpp::Scanner::DisableSkip()GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�R0D0D�F�A�A �G0�
 AABK0xwE�A�G R
AAODAA�0Q�1��QH C
AHqF�E�B �B(�D0�A8�GP
8A0A(B BBBCDP�S�B�B �A(�A0�f
(A BBBJX�����,��O�A�A �oABE���H�qF�E�B �B(�D0�A8�GP
8A0A(B BBBClF�B�F �D(�A0�NPhXL`JhApAxA�A�A�A�A�A�A�IPt
0A(A BBBCzPLRx��P$�F�B�B �E(�A0�D8�D�
8A0A(B BBBD,zPLRx���������45P��F�B�B �E(�A0�D8�D�
8A0A(B BBBD�5�4UP�gE�U B(B0B8B@BHBPBXB`BhBpBxB�I N
AFUA'	7Tu��!@%P�q\�q d5�65�!$!���(	.D
"$%&'(Ukq� D� D�p�0Ap��pw����pw��0 14`RpQ{�"q:������"qBe|� �		!"%	,		�	�	���	�4�	�g.annobin_pcre_scanner.cc.annobin_pcre_scanner.cc_end.annobin_pcre_scanner.cc.hot.annobin_pcre_scanner.cc_end.hot.annobin_pcre_scanner.cc.unlikely.annobin_pcre_scanner.cc_end.unlikely_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_createERmm.isra.40.constprop.54_ZZN7pcrecpp7Scanner11DisableSkipEvE19__PRETTY_FUNCTION__.annobin__ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_.start.annobin__ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_.end.annobin__ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_.start.annobin__ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_.end_ZN7pcrecpp7Scanner17SetSkipExpressionEPKc.cold.55_ZN7pcrecpp7Scanner4SkipEPKc.cold.56_ZZN7pcrecpp7Scanner10EnableSkipEvE19__PRETTY_FUNCTION__.LC0.LC1.LC2.LC3.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_GLOBAL_OFFSET_TABLE__Znwm_ZSt20__throw_length_errorPKc_ZN7pcrecpp7ScannerC2Ev_ZN7pcrecpp7ScannerC1Ev_ZN7pcrecpp7ScannerC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEmemcpy_ZSt19__throw_logic_errorPKc__stack_chk_fail_ZN7pcrecpp7ScannerC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE_ZN7pcrecpp7ScannerD2Ev_ZN7pcrecpp2RED1Ev_ZdlPvm_ZdlPv_ZN7pcrecpp7ScannerD1Ev_ZN7pcrecpp7Scanner11DisableSkipEv__assert_fail_ZNK7pcrecpp7Scanner10LineNumberEv_ZNK7pcrecpp7Scanner6OffsetEv_ZNK7pcrecpp7Scanner9LookingAtERKNS_2REE_ZNK7pcrecpp2RE7DoMatchERKNS_11StringPieceENS0_6AnchorEPiPKPKNS_3ArgEi_ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT__ZN7pcrecpp7Scanner11GetCommentsEiiPSt6vectorINS_11StringPieceESaIS2_EE_ZN7pcrecpp7Scanner15GetNextCommentsEPSt6vectorINS_11StringPieceESaIS2_EE_ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT__ZN7pcrecpp7Scanner11ConsumeSkipEv_ZN7pcrecpp2RE6no_argE_ZNK7pcrecpp2RE7ConsumeEPNS_11StringPieceERKNS_3ArgES5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5_S5__ZN7pcrecpp7Scanner17SetSkipExpressionEPKcDW.ref.__gxx_personality_v0strlen_ZN7pcrecpp2RE4InitERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEPKNS_10RE_OptionsE_Unwind_Resume_ZN7pcrecpp7Scanner4SkipEPKc_ZN7pcrecpp7Scanner10EnableSkipEv_ZN7pcrecpp7Scanner7ConsumeERKNS_2REERKNS_3ArgES6_S6_��������8��������M<��������`��������e=��������j>���������A���������B���������C���������B��������<����������������F���������J���������>��������gK���������K��������&P��������yQ���������7��������N��������>��������UA��������bB��������z7���������T���������U��������	C��������O���������<���������>��������A��������B��������*7��������fT���������U���������C���������O��������0<��������b>������������������������������������F���������P���������Q��������	O��������
7���������C������������������nEz2 '	   
V7��������	C�������� V7��������	C��������C��������C��������)B��������1W��������EC��������RC��������_B��������gW��������V 4 Hp|p��� �`�pT�����S� �S,5D�M!�6�:����.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.rela.gnu.build.attributes.unlikely.rodata.str1.1.rodata.str1.8.rela.gnu.build.attributes..text._ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_.rela.text._ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJRKS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_.rela.gnu.build.attributes..text._ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_.rela.text._ZNSt6vectorIN7pcrecpp11StringPieceESaIS1_EE17_M_realloc_insertIJS1_EEEvN9__gnu_cxx17__normal_iteratorIPS1_S3_EEDpOT_.rela.text.unlikely.gcc_except_table.rodata.rela.data.rel.local.DW.ref.__gxx_personality_v0.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@*P*`*Kt*N�*�*S�*'�'	"@8)*-�	3�	=�	�8@H.0*S�b�(]@x.0*����(|@�.0*�2�6�28*�d��@�.0*�0
qW@/0*����@8/0*�pqq@h/0*��k�@�/�*MB�e % @X0*"Q0-S=Z@Pm���}@p0X*(��+6	(
�2�/0              1575493209  1667  135   100644  7888      `
ELF>�@@)(
!��AUI��ATUSH��8L�&Hc^dH�%(H�D$(1�H��L��H�UH��H�H�$t	M����H��wsH��u]A�$�D$H�EH�\$L���H�T$H�4$�H�<$H��H��H9�t�H�L$(dH3%(H��uWH��8[]A\A]�H�EH��t��D��x:H�{�H�\$H�$H��L��H���H�$�s���H�=��H�=���H���
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA*FORTIFYGA+GLIBCXX_ASSERTIONS
GA$3h864
GA$3h864basic_string::_M_construct null not validbasic_string::_M_createH�<$H��H9�t�H�����
u��B��GA*GA!stack_realignGA!stack_clashGA*cf_protectionGA*
GA*GOW�DGA!��H��H�=�H�=H��H�H�5�GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zPLRx��<$F�E�A �A(�D`�
(A ABBD$zPLRx��`����,zRx�2HW

?
`
����=2z2�
������	 !"#$�#!?U������**BO.annobin_pcre_stringpiece.cc.annobin_pcre_stringpiece.cc_end.annobin_pcre_stringpiece.cc.hot.annobin_pcre_stringpiece.cc_end.hot.annobin_pcre_stringpiece.cc.unlikely.annobin_pcre_stringpiece.cc_end.unlikely_ZlsRSoRKN7pcrecpp11StringPieceE.cold.23.annobin__GLOBAL__sub_I__ZlsRSoRKN7pcrecpp11StringPieceE.start.annobin__GLOBAL__sub_I__ZlsRSoRKN7pcrecpp11StringPieceE.end_GLOBAL__sub_I__ZlsRSoRKN7pcrecpp11StringPieceE_ZStL8__ioinit.LC0.LC1.text.hot.group.text.unlikely.group.text.unlikely..group.text.startup.group.text.hot..groupDW.ref.__gxx_personality_v0_GLOBAL_OFFSET_TABLE__ZSt16__ostream_insertIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_PKS3_l_ZdlPv_Znwmmemcpy_ZSt19__throw_logic_errorPKc__stack_chk_fail_ZSt20__throw_length_errorPKc_Unwind_Resume_ZNSt8ios_base4InitC1Ev_ZNSt8ios_base4InitD1Ev__dso_handle__cxa_atexitv1���������2���������3���������4������������������5���������6����������������7����������������   2��������9�������� 2��������:��������*;��������"<��������)��������.=��������8/(1s/���.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.rela.gnu.build.attributes.unlikely.rodata.str1.8.rodata.str1.1.rela.text.unlikely.gcc_except_table.rela.gnu.build.attributes..text.startup.rela.text.startup.rela.init_array.rela.data.rel.local.DW.ref.__gxx_personality_v0.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@&P&`&l&�&/�&'�"@��&-�3�=��8@�0&SLbL(]@�0&�t�t(|@�0&�2�*�2����@(0&�����@X0&��2
@��&% @&6 1@0&b0(-SUkXP~�����@H�&$x�'.	H\��usr/lib64/pkgconfig/libpcre.pc000064400000000476150403561430012220 0ustar00# Package Information for pkg-config

prefix=/opt/alt/pcre802/usr
exec_prefix=/opt/alt/pcre802/usr
libdir=/opt/alt/pcre802/usr/lib64
includedir=/opt/alt/pcre802/usr/include

Name: libpcre
Description: PCRE - Perl compatible regular expressions C library
Version: 8.02
Libs: -L${libdir} -lpcre
Cflags: -I${includedir} 
usr/lib64/pkgconfig/libpcreposix.pc000064400000000537150403561430013301 0ustar00# Package Information for pkg-config

prefix=/opt/alt/pcre802/usr
exec_prefix=/opt/alt/pcre802/usr
libdir=/opt/alt/pcre802/usr/lib64
includedir=/opt/alt/pcre802/usr/include

Name: libpcreposix
Description: PCREPosix - Posix compatible interface to libpcre
Version: 8.02
Libs: -L${libdir} -lpcreposix
Cflags: -I${includedir} 
Requires.private: libpcre
usr/lib64/pkgconfig/libpcrecpp.pc000064400000000465150403561430012721 0ustar00# Package Information for pkg-config

prefix=/opt/alt/pcre802/usr
exec_prefix=/opt/alt/pcre802/usr
libdir=/opt/alt/pcre802/usr/lib64
includedir=/opt/alt/pcre802/usr/include

Name: libpcrecpp
Description: PCRECPP - C++ wrapper for PCRE
Version: 8.02
Libs: -L${libdir} -lpcre -lpcrecpp
Cflags: -I${includedir} 
usr/lib64/libpcre.a000064400001211100150403561430010054 0ustar00!<arch>
/               1575493209  0     0     0       970       `
.���+x�d��T�T�T�T�T�T�T�T�T������-HA�A�U(fuX���������������������ییی�h����t_pcre_find_bracketpcre_compile2pcre_compilepcre_configpcre_dfa_execpcre_execpcre_fullinfopcre_get_stringnumberpcre_get_stringtable_entriespcre_copy_substringpcre_copy_named_substringpcre_get_substring_listpcre_free_substring_listpcre_get_substringpcre_get_named_substringpcre_free_substringpcre_calloutpcre_stack_freepcre_stack_mallocpcre_freepcre_mallocpcre_infopcre_maketables_pcre_is_newline_pcre_was_newline_pcre_ord2utf8pcre_refcountpcre_study_pcre_utt_size_pcre_utt_pcre_utt_names_pcre_ucp_gentype_pcre_utf8_table4_pcre_utf8_table3_pcre_utf8_table2_pcre_utf8_table1_size_pcre_utf8_table1_pcre_OP_lengths_pcre_try_flipped_pcre_ucd_stage2_pcre_ucd_stage1_pcre_ucd_records_pcre_valid_utf8pcre_version_pcre_xclass_pcre_default_tables//                                              78        `
pcre_maketables.o/
pcre_try_flipped.o/
pcre_valid_utf8.o/
pcre_chartables.o/

pcre_compile.o/ 1575493209  1667  135   100644  70976     `
ELF>�
@@
L�AVAUA��ATU��SI�YA�AA����=�����L���?E�
L�C�[C#�M��I���E�c�E1���E��tqf�I��E�J���A��?A��D	�D9�u�N�#J�\#��tH�HЃ�JwFLc�L�G�ZE��uD��H�1L�5A��Oc�M�>A��fDI����u��[]L�A\A]A^�fD[D��]L�A\A]A^�E��uOD�L�C�L��M��t&@I��E�X���LH�L��C�u���1��	�n9��fD�S�A��7��A��0��PЀ��{����SB�D�ЍJЀ��XL�K�D��E���C���=��8����3�-����<{�cL�Hc�A���I�q��`����@�7�0N�A�I)�A�
��I������`�=�D�7)����D�A��<��A��'��E1�L�A��{�E1�A��-uE�AA�I��A��H��A��
1��	fDM����M�AD�AA�HA�C�Hc�A�*u߅�xlE���|I����}�l�9�������u-�I�������%�����@��������H��P�I�ك�Bƒ�@�����=M�����H��E1�1�A�0L��7�Lc�C�tH����uQ��0uL�1�Lc�C�u�}t1��o�����xA��I��҃��A9��S����"I���E����A������`~
�D�7)��Ѓ�A��AL�)��o���D�CH�KA��}�E���A���H�������<}��<-t�A�u޸������L�K�1����M�ȅ�tmE����9����M�����Ⱥ0��A�7M�)��x����� �7�{����=��M���[���E��tI��1�A��}�#���M���:1�M���3���I��A�������M��)�����A��1��D)غ��M��)‰����I����I�����I��A�L��E�����������H��L�
�8@��StEwS@����@����@������t�xD�A1�A��t�>H���8@��Su�A�<9H��fD@��YrV@��[w<��tL�D�@fA��E��L��8@��Tt�A�<9H��c���f�D�G�A��v��D��u����AWA��I��L�=AVA�����AUA��ATE1�UH�-SH�_H���Gf.�<hw8��HcT�H�>��fD�Cf����H��<Tt�A�H��<hv�H�������[]A\A]A^A_��Cf����H�\��C!�H���wD�H���3uÁ�t/�C"�S$f��f����f9�u��C&A�H��&�W����H��!A���C��������H��[]A\A]A^A_�@H��H�XA���@����@A���t	E9��=���<T���CE��H��E1����fD<_H�{L��D��HE�D��L�D$D�$������x�L�D$D�$A�H��@�Bf����H€:Tt�H�Z�B����E���3�{f����Ix(H��D�Bf����H€:Tt�H9�v	H9��{���H��L���D��D�\$L�$���������A�H���L�$D�\$����f��CH�Sf����A��C��<HB��CH������@�CH�Sf����A�A��t]�C<�vU��?H�5�H�����@A��H�SA��t�C<�w��CH���k���H�C�:�����CH���L���D���	�����������f.�AWAVI�ιAUA��1�ATL�%UH�-SH��1�H����DH����I��H9���E�D��A��X��A�P�����A��r��A��R�TA�P�����E�WfA��fE��tHA��`��E��M�E�A��T��M��Mc��1�1�B�|L��K���I��H9��i���H���[]A\A]A^A_�A�Gf����I�E�A��Tt��D��<Qw���Ic�L�>��@Mc�B�DI�@A�Gf����I�E�A��Tt��]���f�M��L��L��D��H��L�$���L�$��t A�Bf����I�E�A��Tt�����f�A�Bf����I�A�:Tt�H��1�[]A\A]A^A_�f.�E�WfA��E��MV(fA�z��L��D��H��L��D�$L�T$���D�$�������L�T$A�Bf����I�A�:Tt��A�Gf����I�L���0�N����d����H���3�Y������G���f�x�B����7���f�E���2���A�G<��%�����?H�5�I�����@A�GI�W��<LB���f�I�G!�i����E������A�G<�������A�GI�W��<LB����f�AWI��AVA��AUA!�A��ATA��UH�-SH��H����_@��X������]���Jǃ�����B���B���v�ux��us�A�Gf����I�A�<T���|1ɺH��L��^�����у�^tL��_�{����H���f����f�� D��C�D	�H��H���"�����u�H��1�[]A\A]A^A_�DD��D���Ӏx
u�E���a������H���[]A\A]A^A_�@AWAVI��AUA��ATA!�A��U��SH�H����s�������`����^�}��_����Xto��]tj�Bǃ�����B������@A�Ff����I�A�<T�U�<L���Ѐ�St	v���d<w�Hc��H������D���.�����u�H��[]A\A]A^A_�H���f��O����f����f�� C�D	�������f����@�u	E���Q���H��1�[]A\A]A^A_��GL�<Su�I�A���d��v͉�D��L��������_���f�A�Gf����I�A�?Tt�I���Ѐ�Stw+�������H����Ѐ�Su�Hc��H������d<�g���Hc��H��f�H���[]A\A]A^A_�f�f.�AVI��AUA��ATI��UH�-S�������I�~L�������Q�Dwb��HcT�H�>��@H��E��tG�@���tN9�u:A�Ff����I�A�>Tt��(1Ҁ�XH�����p�����x���uɉ����������[]A\A]A^ÉÀ�A�$D�말AWI��AVAUE1�ATM��UD��SH��8E�H�T$�L$H�H�4$dH�%(H�D$(1�H�L$ ���(�b����H�D$ E��D�T$H�H�D$�\���[����#u
@������(�o��)��H�D$ ��|uE����H�HH�L$ �P���+��\u�H�AH�D$ �Q��t ��Qu��H��H��H�D$ ���u5I�A�����H�\$(dH3%(D���H��8[]A\A]A^A_�fD��\u�H�AH�D$ �yEu��g����H�t$ 1Ґ�NH��\ti������^��H�FH�D$ �N��\tA��]�H�T$ H�BH�D$ �J��]���������\�H����@��xEH�pt�H���������u�H�pH�t$ �_�����L$H�T$M��A��H�4$H�|$��������H�D$ �8�}�������A�$�|$A9�A�<$DL��`����H�D$ H�P�DH����
�;���H�T$ �
H�Є�u��r����H�<$H�P�Gs����������A<?�t<*tHE�BE�D;D$uH�|$�2���H�AE1�H�D$ �QH���\���fDH������H������QH�AH�D$ ��(�W�Q��Pu
H�AH�D$ �Q��<��H��E1�'����E�BE�$D;D$uH�|$�����H�p�>H�t$ �8��<D��P9���H��f�H��H�D$ H���9�u�H�|$tHcT$H)�H9��9H�L$ E1�����A������5���H�BH�D$ �J������H�€�Q������H��H��H�D$ ��������\u�H�AH�D$ �yEu�����Q��|����H�AA�H�D$ �QH�����E��t
E94$}E�4$I�A���������y?�QtF��t%��)t H���f���)tH�L$ �H��H����u������H�HE1�H�L$ �P���H��E1��~����H��=t	��!�j���H��E1��i���A������H�|$D�D$D�T$�D�D$�������H�L$ E1�D�T$��#���H���p�����AWI��AVA��AUA��ATUSH��H��(dH�%(H�D$1�H�G0L�d$H�l$�D$H�D$�f�H�T$H�JH�L$�:tM��E��D��L��H��H���F�����~�H�t$dH34%(uH��([]A\A]A^A_��@f.�AWE��AVAUE��ATA��US��H��8H�L$L�t$pL�D$A��|$dH�%(H�D$(1�L��A���I�vI����������#t��f�H9���H��H�xH�|$�H����A���I�V8��u�Mc��L)�L��H9�w�A8��u�A��t
A���8Hu�Hc�I�vH�H�D$����p���H�PH�T$�HH���N���fDA��H���H�|$���Y���A�����I�vH������@��\�I�V�<
��L�@L�D$�(��tY���~OH�
��?H�5�<�#,��w����t)H��1�H�D$�P���H����?��	�9�u�M�D0A�E����I�NM��������<#t�f.�H9��I��I�xH�|$A�@���.A���I�V8��u�Ic��H)�H��H9�w�A8��u���tA���A8@u�L�7I�NL�D$A����p���I�PH�T$A�@I���M���D<*td<?t`�H�=L�������tF�D$��������w2H�Hc�H�>��I�VHc��*uf.�1�H�\$(dH3%(��H��8[]A\A]A^A_�A��L���H�|$������Ic������@I�NI�����A�V`H�t$$E1�D��H�|$�D$$�c��ŋD$$���u���H�D$L�@L�D$A�E�����"������L���H�Hc�H�>��D9��/���A���#�����������I�FHc��,(1�A9������A���0���'H�\$D�#D��A���~DH���?�4H��vD#$��A��tH��1�H�����H����?��A	�9�u�D9����������I�FHc��,(1�A9����f���A��~��t�H�\$D�#D��A���~�H���?�4H��vD#$��A��t�H��1�H�����H����?��A	�9�u�덁���#���
���1��|$�������������I�FHc��(����������������I�VHc��*�����������q�M�� �b�|����P����D1��|$���@�������2���I�FHc��(�Ѓ������������I�FHc��(��������������I�VHc��*��������A��~^��tYH�\$D�#D��A���~DH���?�4H��vD#$��A��tH��1�H�����H����?��A	�9�u��؃����\���H�Hc�H�>��E���������	����4��������&�����H�����������A��/ �6�IA��_ �#A��0�1�����������U
���������1����������1���������E���������	�������E���������	����r����M1����d�����H����T���1�������G���A��( A���G1�������)�����/ t~#��_ t��0�����1��|$������� ��
�������	������Ձ�( �����1��|$������D9���������*���I�FHc��(D9����������H����������H��BH�l�����H���H��������H�H��BH�l��!���I�FHc��(D9����������A���t�����A��
A�������1����������A������I�FMc�B� �Ѓ����A�������I�VMc�B�"���������A�������I�FMc�B� �������n���A���b���I�VMc�B�"�M����F����A���9���I�FMc�B� ����������A������I�VMc�B�"��������A��t$����A�� t~#A���t
A������1���������A��	������A�� A��
�������@f.�AWH��AVAUATUSH��XH�H�T$H�|$�PL�pH�L$dH�<%(H�|$H1�������{���T$ �D$!H�D$�-1�L�%L�0H�D$ H�$�6�+H�<$�Hc�I��H�@M�<DH�A�7H����tJ`D��9�|�H�D$�/H�D$L�0�����H�L$HdH3%(��H��X[]A\A]A^A_�fDA�GH�|$�A�G��DA�]�f.��P��^tT1����}t>�T H��H��tA�V��Hc�I����u�H�D$�.H�D$L�0������\����D4 �����L�p�P��@f.�AW��AVI���AUI��ATA��UA���SH�H�|$�fD��…�tT��R����PtQ�H�H����wSH��H�������?t�O�Q�H�O��HB�H���…�u�[]A\A]A^A_��Gf����H��H�E��u���+�w���H���H��L���f����G�<��Z�����?H�
�H��D���@�O�Q�H�O��HB��z����M�M�]M9�sBA�L�L��f������L�I9�u�UD�f������L�I9�t;H��L9�r��Gf������I�L9\$�w
��o�GH��������"�JI9w�M�]빐f.���SA�L�
I���H�DD�D��E��tcA��PtfA��\txA��_��A�H�Mc�G�����L��H�����?t�GH�O��<HB�L�D�D��E��u�1�[�D�Gf����H��u���D�����H��Y�����Gf����9�tq�H��:���f�L�<+�-������%���M�Љ�I��M�������G�<�������?�H������GH�O��<HB��B���f.�H��[Ðf.�AWAVAUATUSH���H�H��$0D��$��|$L��$HH��$H��$8�t$pL�	H��$H��$PH��$�H��$�L�D$xH��$�dH�%(H��$�1�A�CpH��$�H��$x��$�H��$@H��$p��$(����$@DŽ$0�;_uB�CE1�fD��$jf�$f����f��$hI�C@��$0H��$`H��$`I�C@��$(E1�H��$�HDŽ$�H��$��P�fD�cA�s`Hc�H��$�L�4H��$@��$�HE‰�$�M��H�D$(�������$���$����$ ��t��$�A�C`�D$��;D$pt��$@I��A�D$�A�D$���$���t+I�D$A�\A�D$I����$@H��$�fE�T$��D$L��$XM��DŽ$D��L�d$@��L�d$P�D$[�\$[�|$t�lj�$�1�%����E1�HDŽ$���E1퉜$�L�Ӊ�$���M����D$8H�D$ %�D$\�����DŽ$��D$0�D$H�$�D$�X�A��\��L��$XA�yE��D�l$I���D$8H��L��I��I��L��D�l$I��L��$XH��I�B E�!H�H�|$(M��D���I9�� H�D$@H�|$(I9NjLB�L��H)Ƹ�)�H�H9���H�D$(�H����
L�|$@H;\$P�Y
E�����D$8�)���A�D$փ��
A��?�
A��{�o	H�|$ �TA�E��D$A����D$�D$���D$`��I�BE1�B�0�;
A��#�lA���D$��D �t;H��$X�S���L�|$ A��fA�I��H��I+r0H��A�w�1�I+R0A�W�fA�w���|�XH�=��Hc�H�>��DL��$XH����.�D$0�D$hA�����E1�D$�D$H�$H��$PA�A<+�:
D��$��D$`<?uI��D��$�L��$X�;�G�<�T@����@�����l$`D��DŽ$���u	E���H+��@���3�K�s�,$E����	D�$�E����A����"D$[A����yA���A��%fA��L�{D�fD�c���2E�7I�_���t
�H��@�s��L$`����	H��$P���A����'����4��I��I)�E���[	�0�V� �|'H�
��Hc�H�>���I�BHH��$�H��$XH�PH��$X�P��*�S��?��D$���|$M��E1ɸ^1�E1۽^L��$HA�H�|$(H��$D�L��$PE�bxHD�DŽ$D�t$H��$XH��$PH��PAR��H��$�L�T$xPAPH��$tPSASL��$�����L��$�H��@���\��`L�T$`uH�|$(�aA�9)��=H�D$(H���Q ���$D����)�9��
H�\$(�A��,$A�^U��D$HI�_fE�GL��$XM��$f��$�,$�D$H�`����A�I�_�D$H�,$E1�L��$X�$�ѐE����H��$XE1��PL�HH�Hc��0t,f�C�L�I��D�lJ�A�Hc��0u�A��������}��A�QI�I��}��!Hc�E1�0t-�C�4�H��D�dr��1H��0u�A�����"+H�D$xE9��d$�I�ɉD$h�TfD�D$t�L$��������E��D$I�_A���f�A�Bs�;�]��$��D$���t	A���$�I�_�S����$���
�(��
��|$�L��$X�$�����C��l$\Ajx	ŋD$�D$0�D���@A�R`�L$E1�L�T$hL�t$xL��$XL��L���H��E�6L�T$hA��E����$E���A�����|$[��H��$�D��L�T$0�L�T$0�T$\I�_��������	ʹ�D$tD�l$fDD�l$�A��������1�H��$���<H��H��@�{�9����$���
����A�Jt �|$�L��$X��������L$\�L$����	�k�Ajx�D$0��������DL��$XA�q@��=weH�@$H��sUA�II�Q��u$�FfD��]t;�B@8��^H����t&����\u��B<]u��BH��H����u��E1�H��@E���_	��^�V	A�I��I�qH��$XA�i��\u�A�yEI�qtܹH����€����sKI�q�fDL��$XH����'�D$0A�E1��D$h�D$�D$H�$����f��\$������,$���E�D$I�_�ljD$�����|$0A�L��$X���DL��$XH���w'A�����A��D$h���fDH�D$(M��L��$XH��t%���L��H+L$@)�H�H9��H�D$(ʉA�Cp��$��ƒ�;�$�A�C`DT$p9�MÉT$pH��$���$���A�9|�m'H��$���$@Hc�$(H�L�dI�����@D�l$A������D��$���"T$[����I�_�D$���A�D�l$��fD�D$�D$0�,$�h����H��$XH��V��t����~H�VH���8tf�H���
H��u�@��}tR@��,�A����r@��}t>�0�-����zH���8tH��H���
H��u�@��}�����fD�D$D�l$���D$`uVE1��}���fDL��H�|$PH��L�T$H)��H�D$PL�T$H)�I)�H��L�|$@�r���L�|$PL�|$@�`����I�BA�B�0����L��$X��fDI9BH�-���H�D$x�4H��$�L�1�H��$�dH3%(�RIH���[]A\A]A^A_�DH�|$ t2H�|$(u*H�|$ H��$XI+B0�_��f�\$f��)�f��f�GH�D$ �D$@����H��$X�S���E1�L�|$ fA�?I��H��I+R0H��A�W�I+B0A�G�fE�G����f�@��o�tB�,$L���D$hA	BxE1��D$HL��$X�$�!����H��$X�A��<���H��$�H��fDH��H��H��$X�:�B)�@�>�z��@���t؋t$\��������	�����f.�A��-�DH�|$(�$H�D$ ���fD������D$0�ʼnD$�l���@<]�����@��:�
�H�\$xE‰� ����l$tE�w���t	E����A��~�D$\ABxD	�$�L$`�����D$`DŽ$�E�����D��$�H��$�H��D��ARH��$hD�L$��$�L�@L�T$p��������$�_AXL�T$`D��$����D$`�,$E1�DŽ$����������<��D�$�E���'�DA���^��$�L�{��'�D��f��A��f�C��"D$[E���6E9�����E!D�sH�S���t
�
H��@�r�E)�A���� A��%fA��L�zD�fD�b�����@A��L��$�Ic�t1��ƒ�A�<A�<D9�r�L���f�I���D$`E1�L��$X���@H�t$ H��$XH�D$ I+B0�~��f��)�f��f�F��DA���$�I�_��C�����fDH��$XM���L���@H9��H��H�xH��$X�H����
���H�U8��u�Lc��L)�L��H9�w�8��u�A��t���8Hu�I��L�L7�L��$XA�9���A��1���DD��$�L���H��$X���a���I��Hc���@H��$��8T���$�����L��H+�$�A�9|M�g�?.f��A�TfA�G�$L��$x��$��D$L��$���$����t$`D�s��u	E���"������,$DŽ$�
���X�G�<�%�,$E���I���E���`A����VA����,A��L1�E�fA�WD����A�WI�_�,$A�G�h�@�,$�D$0�����0�H�D$x������]��A�Bs�<H��$�f�H��$�I�GH�D$H)�$�)�$��D$L�\$HE1��D$h�D$`A�������L��$�L��E�ωD$0D��$M��D��~ �|$[tH��$X�.�����H�D$(H��t
H�|$HH)�H���D$8����H��$XI���P��\�x��E��D$8L�hI�uH��$XA�m������]�i����D$8���]���E��M��I��L��$�D��$A����D�D$`E�����D$t���XE���OA���u�\$������,$A�E�gL��$X����D$8E�I�_�D$�D$0�j�f.���[�G��\�nA�V`�L$L��$XA�H�t$xL���[����H�D$x��������������X���������F��������H�|$(A�W�]�E��������@+����3����m&�����9�E����H�L$xH��$LH��$HL���<����<1҃���D$8��;�$H�C���D$`H�����S���$L�C�L��$X����H��$�H��H�� ��0@1H��H��H9�u퀤$��DL��$XA���D$8���H��$XI���D$@��>�(�P��\��xE�uL�h��I��A�}E�aL��$XA�EM��<\t݃�
����
���D$8<-���t$t��t#�������~�D$���H��$X��U��IՉ���Lc���Hc�@���T$0��t"I�VF�$"D��D����Hc���@��A��I�����A�Nt A�A�D$8<-�S���fDI�AH��$XA�I��\u>A�I��E�I�Q�fDH���J���E��H��$X�
H�Ѐ�\t܄�t	��]���D$8�xf.���
�_�����
�V����D$8����H��$X�pI��@��=��H�@$H��� ����PH�xI�����
�����\t)��]�����A�M@8�t-��I���������\u�A�U��]u�A�UI����D��]u�I�NH��$@��:��:H��$X�x^H��$�u$H���D$8H��$�H��$X�D$hE��1�D+�$�H��$(L��Mc�D��D��$4L�M��L��$8I��L��$ �DI��M�d,C�,3@���?(@��9�u�H��$�L��L���L���u�E��H��$(D��$4L��$ L��$8A���D$�'-C�H�
H��$Hc�Hc��o,)�$��o|�B��H�Hc�)�$�Hc�)�$������J,��H��$���1�)�����1��u��$��D$8���.H��$�H��H��H�� �H����H��H9�u�I���D$8A�
���D�L�k��L����L$0Hc�L����L��$X�D$`�E����H��$X��pI��@��\�����xE�������
t��
uA�Nt D�\$8E��������S���fD��\�����xE�P���D�\$8E��������'���f.�H��$XI���P����@H�
��?H�=��@Hc�I���#,���t$H�V1�f����H����?��	�9�u�L�H��$X���f�H��$�H��$�)�$�)�$�����:H�D$xI�����f�������,$DŽ$�����fDI��A��1��z�f�A����fA������|$��i	A����_	A����M	�D$����A�E���hL��$HH�L$xL��L�T$0H��$LL���C��L�T$0���>1�A���L����;�$HA�G��I����A�W���$LA�G��D$L��$X�D$0�$�D$H��f.�������,$��������k�fDD��$��:��p��N�r��B�Hc�H�H��$P��fDH�
�HЋT$tH��$P������P���������?H�
�H�H��$P����G�<��L��H)�H��$�A��@��`��A�����!E����
I�J(M�BHHc�$�L��$�A����#A�L��$�H�߾��$�D��$��M��L�CH��H��L���A�^H�KD��$�I��L��$�fD�KL��$�A�CiDŽ$��CI�_A���,$E����
Hc�$I��A��VI)�H�|$(A�EE�]���f��M����I)�A�;]�n�M��D��$�M���%f.�A�D$f����I�A�<$T��0L��D��L��L���4����t�M��A���@I�r�P����H�XH��H�\$`�H��$X�I��H�@�u�:�-2��)��E��D+t$`1۸L�%L�E��L��L�|$hL��$��@H��O�d4H����A��Lc�A9�u�H�|$`L��L���L���u�Hc�L�|$hL��$�A�L���p�i'A�1�I��L��$X�h�L�HL��$X�@�PÀ���,<<�4I�R�>M�iL��$XA�yH���:��.I��L��$XA�	M��I��H��
u�D��D)�\$hH�|$(��,9��\2A�BX='�52�\$h�SA;R\|�SA�R\�� �I3H��$X��A�BXf.�A�B`�|$M��E1ɻE1۽_��L��$HA�B`��A�GA�B`A�G�_���DA�����L��A�����A�D$�A��%A��I����E�_�A�G�E�g�A����"D$[���A��L�{D�����H�H�
��H��������H��AH��|��9�t�H�kH���Hc�H�D$0L��$X�D$`�	�@L�HL��$X�H�Q߀�[�<H�5��Hc�H�>����{�&�fD���0D�sH�{���t�KH�{@�sA��L�D�����$���x$�\$9�tAD��$�E���L	DŽ$�����D�l$E��xD�$$E��y�D$�$��$��D$��$��$��$���1�	ک���������D‰$���
�A��t$L��1�H��$�L�\$�.���L�\$����0����)H��$�f��f����@H�D$x���A��I�_���b��
�|$�A�L��$X�b�D$�,$�D$0�D$H�$��A��#L�{D�����Q�$H�PH��$X�H��\���xEH�p�(	�D$tA�\���l	�D$8A9��O-���A��
��A��
��D�D$tE���-A�����A����D$�H��L�A��E��H�
D��E9�riD��D��H�=����A�����G�T�D�A9�t4�f��Љ����A����H�=�G�|��9�����A9�s�A��H���L�k��L��D�\$`�D�\$`Hc�I�D��L����D$`Hc�L�L��$X���fDL��1��B��f����H€:Uu�A�h������+���-A�9)�H��$P�|$���$L��$H���������5�D$M��DŽ$��D$0�w���D$����A�����	A�����H��$X�B<<@��<'��@���1�<{���D$A��I��E�o�L��$X�D$0�$�D$H�i��H��$P��\�4���M��X���D$�,$DŽ$��D$0�����H�D$xE���D$hD�t$hL��$XE���y���H���gE��$DD$H�$�D$DD$0E9�D$�DD$h�D$h���fDA��I�w�A�����fDH�����<�t�M��I)�Ic�A�΀H��$��L��$�D��$��D�L$`�;D��$�L��$�E�����DŽ$��r������.�D$8L��$X�Z�D��$��,$�D$`�"�DH��1�D���D��$�ARH��$hD�L$��$�L�@L��$����ZY��D��$��,$DŽ$�
��DE\$`L��$����D$`��@A����A�����A��L�D��E���fA�w��A��!A��L�{��D�"D$[����DH�D$xI��A�������D$h�����H��$X�X�pI��@��\�P����DA����DŽ$H�|$(H���9!A�E�Hc�$D��H�H����H=����X'���)�9��G'H�D$(ы,$�E���;H��$�E)�L��1�E���{���H�|$(H���5E���,��$D����H�A��Mc�I�ă�H=�����&�7��)�9���&H�D$(�H�߉H��H������H��H��H�_H)ȃ�Hc�A��H)�A���Vf��f����f���Uf�Gf�F�z��fD�+����*����)�����(�����E�����D����C����B����8����7�x���6�h���5�X��A��LfA��1�1�E�fE�oA�������@���\$�$A���E)�\$0D��L$H�ԋ\$�����A�WI�w�A�G�����A�QEˉL$D����A�� M�A	BlE;rh~E�rhI�B@L��$XH��u�f.�H�H����PA9�u�L��I��f�x
�����I����f�DŽ$H����DŽ$L����A�9)���|$�H��$PM�������������,$DŽ$��D$0�D$�J��f.���$�DŽ$��������f�A��FI�_E��4���I�J(M�BH�L��$���$�L�$H��H��$P�<��H��$PIc�H�~�A�D$L�$H��$P�]��f���C�Uf�S�H��$P�bH��$P�B�K���$�H��$pM��f�H�{I9�wL���L��芵�����k"H�H��u�H�D$x�(�L��$X��H��$X�H��\����~Q��H�VH��$X�N��\����H���~�Et��D$tA�\��������D$81�A�\L��$�A��\�z���@���q���L�l$xA�V`H��$XA��L$L��贫��E�MA��E��L��$��4������.�������;����������"L��$X�D$8���@�\$��������EÉD$�D$h���D$`�|A�I�[A�PE����E�wH��L)�f��fA�G�D$L��$X�D$8�,$�D$0����@�C���f��H���C�UD�C�f�FH)��������f�D;t$h����NA�E����H�|$(��I�_!���@E�����A�����|$[��H��$�D��L�T$0��D$8L�T$0�O��D��$H��1�1�ARH��$hD��D�L$��$�L�@L��$�����AZA[��D��$�,$��DE\$`L��$����D$`�5�L��$XA�}\��I�uH��$XA�m���H��D$8����L��$XI�BHA�'M�HH��$��>L��$XA�x<DD�A�@�PՁ���M�`��tM��A9��

I�JL��1��@H�����t"��D9�����D��u�A9���H�D$x�,$�9H��L��I�NjD$�D$0����E��H��$�A��Ic�t1�A�Ѓ�F�$G�$D9�r�L���fDH��$X�R�pI��@��\����DA��#L�zD��>���D��H�t$HI�$L�T$8��L�\$0A�GH)�H���L�\$0L�T$8fo�$�fo�$�I�[!AOAW���f�Aǃ���E��L��$�A��Ic�t�$1҉׃�A�9A�?D9�r��$I�����L��$XA�y\���D$8����D��$��D$8����D$H�,$�$������D$�D$0�,��@H�D$x�	����L��$X���H�D$xI�������D����A�9)��D$H�,$M��H��$P�$���fo�$�fo�$�A_Ag�*����M�wL����$0H+�$�A�Uf��fA�G��tI�C@f�x
��H�I�C@�D$��;D$pt A�9)u�D$p��$@I��A�F�A�F���$���$�A�C`H��$�L�0H��$�L�H��$�H��$��$��H��$�H�������$@��)�9���H�D$x�1������{h�L�H�D$xL��$X�7�[��fD��I�F�ك������H�5Hc�H�>���D�T$tD��E���.�D$8�A����b�����?H�5L���RHc�I���E#�A���t H�pE1�f����H����?��A	�D9�u�L�H��$X����f�A��A����rH��$�L��$���$L��Hc�D�xD�@H�4$A�siE��E��@��$�E��@��$�L�eH�E���9L�[�C^H����L��H)��ԈSH��H�$�C�H��$�H��D��$D��$�L��$��L��$�M9�D��$�D��$L��vDH�UHA�EI��f����D�E��tX���"A�U�H�EHD҈PH�EHH�PH�UHM9�w�A��H$M��A�������I��L��$�H�ߋ�$���@���"A�U�H�EHDˆPH�EHH�PH�UHM9��g���H�<$I��L��$���$H���f�H��H�$H��$�D��$H��D��$�H��$��M9�H��$�D��$�D��$����1�1�����I��H�D$x�<���DA�pH�I�PH��0t�H���2H��0u�D9�������\$M�HL��$XA�H�\$0�$�\$H�zDA�E���IF���H��$�ʉ����f��D$8��<����D$��$H�D$0��y�,$DŽ$�����A�)H���+�C
��-��
A�Q���I�yH��$XA�QI��H���������1�f�I���4�L��$XA�Q��\r�A�1�0u�D9�����-��
��+�=
H�|$(M�Z(u!A�����
M���fA�{���A�]RL��I+B(A�UH��L��DŽ$D	A�GI�G	M+Z(�|$�E�_fE�OL��$XA�GI���&���D$�������E1�H���|$M��E1�H��$X1۸^�^L��$H�v���x?�7�@<=t<!t<<�#�|$M��E1ɸ`1�E1۽`L��$H�2��H���|$M��E1�H��$X1۸]E1۽]L��$H���H�PH��$X�x)���|$E1ɸY1�E1۽YE1�L��$H����H�PH��$X�P��)t"��tH�����)tH��$X�H����u���E��H�D$xL��$X��U��A�SH��$XH�L�JL��$X�rHc�����H��1����H��$XI��H���LN��r�Lc�B�u�@��)�N�����
H�D$x�&����I�JA�)A�M�iL��$XA�A���I�A�H��$X�I��H��H�����u�L��L)�H�|$(�D$h�Z���zD9����� �(1�E����M�Z(�@���I�R�'�s��H��!����=��H���|$A�1�H��$XE1۸Z�ZE1�L��$H����A��B���L�HA�)L��$X�HH�����H���|$E1�1�H��$XE1۸X�XE1�L��$H���H��$�L��L�$M�w	L�\$H)�H�{H���I�G�]L�$H)�L�\$��$@f��f�CA�GUfA�GI�C@���D�l$t��$�<��$� E���+���H�k�( H��T$8��) Hc�H�H���@DŽ$��D$�D$0M��������e��DL��$HH��$LDŽ$LDŽ$HL���A�<)�\<:�TI�QH��$X�B���-<KwSH���Hc�H�>��I��H��빃	fDI��뫃	�	��	��	@��	�߁	A�Jt��H�D$x����������D$L��$XM��A�H�D$0�$�D$HH�������D��f.�D�JD�WE9���D��D�����A����H��B�T�D�A9�t>�DD��E���A��A���D�L�A�CD�\�E�A9�uE��A��A�RE9�s�9��?�E�D9���
D9��3H�VD9���H��D��H�T$`D��$��H�T$`D��$�H�
Hc�L�H���@D��L��$XE��)�H�|$(E�|����D�D$0Hc�A�{�fD��E��A��IŃ�A��H�D��E��t#I�FA���H�ȃ�H��A���D����H��9�u�E���"��DA�Nt �#�fD�|$0������<��A�}E�p�I���D$8����@DŽ$�,$E�����H��$�1�L������A��JI�_E������H��D�Bf����H€:Uu�D��)Љ�$E����A�I�J(M�BHH�ߋ�$�L��$��D��$L��$�I��褻��H�CHc�$�H��H��H��$��L��$�D��$A�Ci1ɈL���m��DD9��.��C9�����D����@A�yE�h�I���2���@H��$�M��D�Qf��f�Af����H)���f��u�D$��$��$��$����D$t��$���$���$����d���H�k��H��T$8��H�H��EH�]H���� H�H��H�XH����
 H�H�H����/ H�H��H�kH����_ H�H��EH�]H����0H�H��H�XH����T$8L��$X�D$8H��D$`H�A���g��H�D$xL��$���O��A��HI�_E����A��H��I��L��$���$�D��$薹��H�CH��H��H��H��$��E��D��$L��$���r�,$L�����Hcщ�����X�������
�D$DŽ$��D$0��������H�D$xL��$X�@���H��$�I�OH�p ��H��H���҈Q�H9�u��	�A����A����-A�B`)؉Ã����
H�|$(���A�I�z(��$���L�T$h�L�T$hI���������H��D��$D��$�H�T$`�H�T$`D��$H�D��$�H����L��I���)���H�D$x����A���6��,$L�����L�JI�JL��$X�B<<� �\$<'A�}�'DD؉\$0�$�\$H����DŽ$�D��	ŋD$�D$0�[���A�GdH��$X�B<R�PL�Z<<�t<'�g
L�r�'�����L��$X�BI�r��L��$XA�SH����
I�VE1�H�=A������)C�L�D�lH�H��$X�I��H�R���2E��x�Hc�u�E����I�QH��$XA�q�0��I�����A�OH��$X1�I+B0L�|$ I��H���D$H��A�G�H��$XI+B0��A�G�1�fA�G�L��$X�l�������AZ`��DŽ$����H��$�H�H��H�~ �0H����@1H��H9�u�L��$XA���D$8�D$h�x��H��$�H��@H��H�~ ��0@1H��H��H9�u��p��H��$�H�H��H�� �0@1H��H��H9�u��@��H��$�H��@H��H�~ D�0H����@1H��H9�u��K���H��$�H��H�~ ��0H����@1H��H9�u뀌$��������5H��$�H�H��L�B @�8@>H��H��I9�u����A9��5��-�U
��+�L
H�|$(M�Z(�W�A�1��7�D$A�>�D$0�$�D$H�j�H��$��o��)�$��o���@)�$�)�$�H��$H��$�H��L�B H�@�H���� H��I9�u������z&��
L�rI��1�������A�X�y��I�B@A�B|H������A�q�P	I��A�W��PA�W�H�H��u��]��H�D$xI���?�?��f�E��E)�H��$X��~I�AH��$XA�9���L�HL��$X�8)��H�|$(��A�`�|$M��E1�L��$PE�bx�E1�DŽ$D�`H��$DL��$H��H��$�H��H�q ��
H��H��H9�u�����������H��$������H�5�@��H������
����u�D�|$tE���b���H�k�H��T$8��Hc�H�H�����H�H��H�XH����
H�H�H����H�H��H�XH�����H�H�H���� H�H��H�XH����. H�H�H����0 H�H��H�XH����^ H�H�H����` H�H��H�XH�����/H�H�H����0H�H��H�kH�������Hc�H�H���t����<$���g��$��l$���TH��$�D��$ A���$Hc�H��$�L��H�$H��$�D��$�E��D��$(H��M��E��A���H�$H��$�L��L�cH�I��I9�v8H�CH@�UH��f��D��0�U�H�CHD�PH�CHH��H�CHI9�w�L$A��L��D;�$�|�E��I��M��L��$���$D��$�D��$ D��$(H��$�����D�\$`1�L�׉�L�T$`E����1��x���L�T$`����I�R(I�GLc�I�JHH��H)�I�H��H���I�RHI+B(�BI�BH��I�R�
����L�HH�D$x��`���H���|$A�1�H��$XE1۸[�[E1�L��$H�@�����$���M��H�|$(�3�D$hA�jXI�ZP����HcL$hE1�L��$�D��$E��I��M��H��H�˃�H�H��$�fDI�vH��L�����uH��$�A�<��IcD$\A��I�D9�u�L��$�D��$M��D�\$`1ɋT$hL��L��L�T$hE��������L�T$h�����`E����M�Z(�������$H�\$xE‰����E1�<>I�JA�)A���B�A�����DŽ$�������D$�D$0��A����A�BXHc\$hI�jP�D$`H��$�����CI��E1�L��$�H�H��$ I��L��H��$��D$DŽ$�%��$�]f�H��$�A�<u]A��M`f���Q��9�t��$���4DŽ$�HcE\A��I�D;d$`��I�^L��L��H�����t�yҋT$`HcE\L��H��$�L��$�D)�I�<��Hc����$�L��$�����A�BXA�J`I�jP�D$`����~3�t$`1�I9�t�Ef����9��T��IcB\H�9��I�^��L��H��L�T$`A�A�B`H��$��A�F��D$hL�T$`��H�A�A�BX���H�������$L�|$�$H��!�<)�FI��M���^1�L��$XE1�E1ɽ^����D$h�]��M�����H��$�1������ t0H�����x�1��t��u���@�1��f.����@�1�Ћl$t�������H�k�H��T$8��' Hc�H�H����) H�H�H�k�H������M���d���A�o1�I��L��$X鏱���D$A��D$0�$�D$H�1�L��L��$�M��D��$�f�������H�D$xL��$X�鯼��H�D$xL��$X�闼���,$���A�R�q��L�r�>�����L��$X�B�����E���{A�B`��D�D)����-D…��A;Bd�f���fA�GA�`�|$M��1�E�bxE1�E1۽`L��$PL��$HDŽ$D�,���H�D$xL��$X��˻��H�D$x�黻��H�D$xL��$X�飻��@��)�`�H�D$x�'鉻���HՁ���L��$XL�r�Z1��B��H�D$x�;�O���I�ꋼ$�L��$�A�B`H��$ �H����������H�D$xL��$X�A�	���H�D$x�6���M��E1�E1����H�������H��$X��A�GfL��$XA�CM�s���A�B`�H�N���I�B(H��I9���D$1��t��A�I����A�G�L��$X��1ۉ|$%��������$�1ۉ�$�����%�D$\�ͮ���׉�A�����H�D$xL��$X�����H�D$x�0����H�D$xL��$X�1���H�D$xL��$X�*�߹��M��H�D$xM���>�ɹ��H�\$xI���鶹��H�D$x�*馹��A�BXI�ZP�D$h���QE1�Ic���$�H��$�D��M��L��$D��$��IcD$\��H�;l$h��H��$�H�sL�����u��M��A�G�fA�G���H�D$x�����H�D$xL��$X�:���H�D$xL��$X�+�޸��H�D$xL��$X�0�Ƹ��H�D$xM���鳸��A�B`H�]I��H�=���H�D$x�)鐸��H�D$(H��t	�8����A�zp1ۉ|$���I��H�D$x��[�����$�L��$M��D��$�D�L$`1�L��D��L��L�T$`E��L��$���D�D$h�Ǚ��L�T$`��~(f��A�G�fA�G����H�\$x����D�D$hL��$������A�{R��A��tmE��~ZE9jd|;fA���fE�o����H��$�1��H��$��I�GH�D$H黽���H�\$xL��$X��^���A��������޺H�5L��L�T$`�L�T$`���m���A�Gh�����H�
�A�4H���1����k�
�l.�A9���u���A�Gf�����H�D$x�:�ɶ��M��1�I�Ӄ�����|$tD������1�H���D$8���I�GH�D$H�μ���D�l$����Nj<�@9����|�f.���AWAVAUATUSH��H�$H��(H�T$H�L$L�D$dH�%(H��$1����D$\%�D$h�$H�|$xH����	H�D$I����M��H�H�D$H��t�H�|$�0
H�D$M�����H�LD�I��L��$�H��$�I��H��$�I��@H��$�� ��	L�L�
L�L�L�=fDHc�M�,A�}(�A�|*�
I�D�L��H������������H��L����������w�H��L����������t�H��L����������i�H��L������������H��H�=����������H��H�=���������H�ƹH�=�����u&���Hcځ��M�,	�A�}(���@D�$E�����%�=���=������H�D$xH�\$�D$h8A�8L)�D�Ǿn�oH�����QH��H�H@��u���������PH����f������2���f��������	�������� ��fD���0��fD��%p= ��~D=@��=P�h=0����H�A�
H��$fD��$�5����=���DŽ$�
DŽ$��$L��$L��HDŽ$�HDŽ$�HDŽ$�HDŽ$�L��$�L��$�L��$�L��$����$����L�D$hH�L$xL��H�T$pE1�H��$�HDŽ$�HDŽ$�L�|$pƄ$^L�D$0H�L$@H�T$8H\$xH��H�D$dPH��$�H�D$0PjH��$�H�D$HPH��$�PH��jj�:���D��$�H��@�D$xD)�E����HcT$\������$�����$�H�H�T0H��H�T$H�H�T$HH��H�����PH�
1�E1��ERCP��$�I9ι��$�LD�0E1ɉE��$�H����$�f�uf�E��$�L�u D��f�E��A��f�}f�UfD�E�EH�H�E(��$�H�M0L�4HDŽ$DŽ$�DŽ$�H��$�L��$�L��$�DŽ$HDŽ$�L��$�L�t$xA�^�}j�t$0��j���t$HSjjL�D$pH��$�H�T$x訡����$ f�E��$(f�E��$4H��@D��$�f�EE��t�D$d�����L$h��uH�D$x�8t�D$hH�D$pH�PH�T$p�H�D$pHcT$\L)�H9��:�D$h�$1ۅ��É$�f.�L)�H��H��A�U��D$h��uZH��$�L9��H�B��4$L��H��$��B��R���	�H�M�,I�\A�U���	��H��u��D$h5��$�����4$�����L��A������I��H��tx�$L�l$ L�4$DfA�yuEA�A�A�Q�L��L���	кH�M�T�E�2A��u�׀��E�2���]f��fA�AI�yD�����I��H��u�L�4$�D$h���B�E��h�D$d����fDH��$dH3%(H����H��([]A\A]A^A_�@���@����� ���L������H�|$��������D$h,A�,���������}�����
�P�>���DŽ$���DŽ$����H�
H�D$H�H�D$H��tD�1��#����D$h������$�]f9]���D$h�(��������$E‰D$hH���D$xD�D$hD)�H�\$H�
�E���p����!���L�T$l1�L���D$l��$�L���=��������M�D$d���u����Eu?���tH��$���H���4�р�9�D�f�Uf�M�>����������r�������'�����D$hA����H������H�D$1��c���DŽ$�
�e����D$`��xM��tH��$���H���9�u��f�Ef�M�.���H�D$xH�\$�D$hA�L)�����1�L��L���p����D$`��y���$�1�L���G���������f�M�����D$hA����H�D$xH�\$�D$hA�L)������f.���M��I��H��1����
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864��������������������no error\ at end of pattern\c at end of patternunrecognized character follows \numbers out of order in {} quantifiernumber too big in {} quantifiermissing terminating ] for character classinvalid escape sequence in character classrange out of order in character classnothing to repeatoperand of unlimited repeat could match the empty stringinternal error: unexpected repeatunrecognized character after (? or (?-POSIX named classes are supported only within a classmissing )reference to non-existent subpatternerroffset passed as NULLunknown option bit(s) setmissing ) after commentparentheses nested too deeplyregular expression is too largefailed to get memoryunmatched parenthesesinternal error: code overflowunrecognized character after (?<lookbehind assertion is not fixed lengthmalformed number or name after (?(conditional group contains more than two branchesassertion expected after (?((?R or (?[+-]digits must be followed by )unknown POSIX class namePOSIX collating elements are not supportedthis version of PCRE is not compiled with PCRE_UTF8 supportspare errorcharacter value in \x{...} sequence is too largeinvalid condition (?(0)\C not allowed in lookbehind assertionPCRE does not support \L, \l, \N, \U, or \unumber after (?C is > 255closing ) for (?C expectedrecursive call could loop indefinitelyunrecognized character after (?Psyntax error in subpattern name (missing terminator)two named subpatterns have the same nameinvalid UTF-8 stringsupport for \P, \p, and \X has not been compiledmalformed \P or \p sequenceunknown property name after \P or \psubpattern name is too long (maximum 32 characters)too many named subpatterns (maximum 10000)repeated subpattern is too longoctal value is greater than \377 (not in UTF-8 mode)internal error: overran compiling workspaceinternal error: previously-checked referenced subpattern not foundDEFINE group contains more than one branchrepeating a DEFINE group is not allowedinconsistent NEWLINE options\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain numbera numbered reference must not be zero(*VERB) with an argument is not supported(*VERB) not recognizednumber is too bigsubpattern name expecteddigit expected after (?+] is an invalid data character in JavaScript compatibility modedifferent names for subpatterns of the same number are not allowed�@���������`���������� ���� ����@��������������������������� ����alphalowerupperalnumasciiblankcntrldigitgraphprintpunctspacewordxdigitpnooklmACCEPTCOMMITFFAILPRUNESKIPTHEN:;<=>?@�������������������[\]^_`������
�
��	���Q\E{0,DEFINEUTF8)CR)LF)CRLF)ANY)ANYCRLF)BSR_ANYCRLF)BSR_UNICODE)Error text not found (please report)GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�LE�B�E �A(�C0��
(A EBBGA
(D EBBAl�`�B�O�H �E(�D0�H8�HPN
8F0A(B BBBAy
8A0A(B BBBE`�B�B�J �G(�H0�H8�IP�
8F0A(B BBBD�
8C0A(B BBBK\H,B�E�E �H(�D0�H8�G@�
8C0A(B BBBFd8F0A(B BBBx�B�B�E �E(�G0�C8�K@�
8A0A(B BBBDK
8C0A(B BBBA�8F0A(B BBB<$�B�E�E �D(�H0��
(A BBBAHd8B�E�B �E(�D0�D8�Dp

8A0A(B BBBGH��B�E�E �E(�A0�A8�G`x
8A0A(B BBBAH�r
B�E�B �E(�D0�A8�Fp
8A0A(B BBBDHH�B�E�B �B(�A0�A8�D��
8A0A(B BBBGD��B�E�L �E(�D0�J8�p
0A(B BBBA �UE��
F��XB�B�B �B(�A0�A8�G���A�B�Q�B�I�A�B�Y��
8A0A(B BBBF�E�l�B��T�f�A�M�F�j�B���F�B�B �B(�A0�A8�G� L�"\�"F�"N�"B�#N�#I�#E�#B�#Q�"��"��"D�"D�#G�#A�#B�#B�#x�"�
8A0A(B BBBELt�5Qq�������� ����	 ,P
+`�B 8R`�^r
t� �|0"��@%X�T���@���%�`8�`P��m	��#($1*>05EZp	��������*9CSZ�#Um|����P}������`�.annobin_pcre_compile.c.annobin_pcre_compile.c_end.annobin_pcre_compile.c.hot.annobin_pcre_compile.c_end.hot.annobin_pcre_compile.c.unlikely.annobin_pcre_compile.c_end.unlikelycheck_escapeescapesdigitabfirst_significant_codefind_fixedlengthcould_be_empty_branchis_anchoredis_startlinefind_firstassertedcharfind_parens_subfind_parenscheck_auto_possessiveget_ucpadjust_recurse.isra.0compile_regexposix_namesposix_name_lengthsposix_class_mapsverbnamesverbsCSWTCH.739error_texts.LC0.LC1.LC2.LC4.LC5.LC6.LC7.LC8.LC9.LC10.LC11.LC3.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_utf8_table4_pcre_utf8_table3_pcre_OP_lengths_GLOBAL_OFFSET_TABLE_strncmp__stack_chk_fail_pcre_is_newline_pcre_ucd_stage1_pcre_ucd_stage2_pcre_ucd_records_pcre_utt_size_pcre_utt_pcre_utt_namesstrcmp_pcre_find_bracket_pcre_ord2utf8memmove__memcpy_chkmemcpymemcmppcre_compile2_pcre_default_tablesstrlenpcre_malloc_pcre_valid_utf8pcre_freepcre_compile6B��������EC����������������������F�9��&D���������D�������� �B��������	� 	D���������B��������;D��������k
D���������DOs�!��������%F��������TG���������G��������/H���������B���������C���������"��������� OH����������QB��������_C���������B���������C��������sB���������C�����������I���������J���������K���������I���������J���������K��������G��������� L��������� M��������*!N��������6!O��������"G��������Y"D��������#B���������#D���������#B���������$DX�$D[�)$G+-��.Q��������>0!��������{2�<3R��������l8H���������?��?�@F��������%@�h@<=AQ��������BB��������BC���������CD=DD��������EDB���������DR��������<F�CF\�FF���������F\EHI��������LHJ��������lHK���������HQ���������H��JI��������KK��������KJ��������WKJ���������KQ���������KQ���������MS��������SR���������UQ���������WR��������oZ
�ZB���������ZC���������[T���������\T��������8]�^�g`��a�UbR���������bQ��������^c$
�c�*dJ��������cdJ���������dQ���������dK���������dI���������fR���������gQ���������gQ���������gQ���������gQ���������gQ��������hQ��������hQ���������hR���������iP���������iQ��������	k�Fo\�oQ���������oQ���������oQ���������oQ���������oQ���������oQ��������pQ��������pQ��������7pQ��������IpQ��������bpQ��������tpQ���������pQ��������4qT���������rF���������tU���������tR��������puT��������XvQ��������kvQ��������mzF��������e|#��������r|F���������|�$}G���������}W��������J~$��������Q~%��������X~&��������_~'��������f~(��������%)��������D*��������c+����������a�X��������V�Y��������q�W����������P��������ÄP��������7�P��������…Z��������-�,����������[�����������Q�G�������� t�  n# '$+(,04"8&<*@.D2H6L:P>TBXF\J`NdRhVlZpStbx[|j�n�r�v�z�~�w���������������������������������������������������������
 �$ (a,e0i4m8q< @$D(H,L0P4T8X<\(`	d	h�l�p�t�x�|������������	�	�	���������������������� �$�(�,�0�4�8�<�@�DHLPTX\`d h$l(4	,t0x4|8�<�@�D�H�L�P�T�X�\�`�d�h�l�p�tpxt|x�|������������������������������������
��
��
��
��
��
��
��
��
��
��
��	��	��	�	�	�
�	



 
$(,D0H4!8%<T@XD�HL9PhTAXp\`S
dW
hUlYpc
tg
xk
|o
�m�w
�u�
��
�|��������������������������
��
��
��
��
��
��
��
�����X��
��
��
!%�� $�(�,0 4$8(<�@0D�H8L<P@TDXH\L`PdThXl\p`tdxh|l�p�t�x�|��������������������������������������������������������������� �$�(	,40�4�8c<8@<D@HDLHPLT�X�\Z`^d`hdlhpltpxt|���E���������~�a�8�<�@�D�H�L�p�"�$��`�d�h�l�p�t���������m�L� ��8<@DHLY ]  N$R(P1,2024282<2@2D2H 2L$2P(2T,2X02\42`82d<2h@2lD2pH2tL2xP2|T2�X2�\2�`2�d2�h2�l2�p2�t2�x2�|2��2��2��2��2�p-��2��2��2�,��1�h*��1��2��2��1��2��2��2��2��2��2��2�2�2�2�2�2�2�2�2 �2$�1(3,3034383<3@3D3H 3L$3P(3T,3X03\43`83d<3h@3lD3pH3tL3xP3|T3�X3�\3�`3�d3�h3�,1�0�t/�H/�|3��3��3��3��3��3��3��3��3��3��3��3��3��3��3��3��3��3��3��3��3��3��3�3�3�3�3�3�.@3�P �R$�P(�R,�P0�R4�P8�R<�R@�RD�RH�RL�RPdQT�RX\Q\S`TQdShLQlSpStSxS| S�$S�XQ�,S�PQ�4S�HQ�<S�@Q��_�c�	`�c�c��`��a�k_� c�$c� ^�,c�(^�4c�8c�4^�8^�<^�@^�D^�H^�L^�P^�T^	X^	�_	hc	�a	pb	`	xc	|c 	�c$	�c(	�`,	�c0	�c4	�c8	�c<	�c@	�cD	�cH	�cL	�cP	�cT	�cX	�c\	�G`	�cd	�bh	�cl	�cp	�ct	�cx	�c|	�c�	�c�	�c�	�c�	�c�	�c�	�c�	�c�	�c�	d�	d�	d�	d�	d�	d�	d�	d�	 d�	$d�	(d�	,d�	0d�	4d�	8d�	<d�	@d�	Dd�	Hd�	Ld�	Pd�	Td�	Xd�	\d
`d
dd
hd
?c
�l
9l
�l
�< 
�k$
|l(
oc,
�c0
�c4
�c8
�c<
�c@
�cD
�cH
�cL
�cP
�cT
�cX
�c\
�c`
�cd
�ch
�cl
�cp
�ct
�cx
�c|
d�
d�

d�
d�
d�
d�
d�
d�
d�
&d�
*d�
.d�
2d�
6d�
:d�
>d�
Bd�
Fd�
Jd�
9d�
Rd�
Vd�
@d�
^d�
bd�
fd�
jd�
nd�
rd�
vd�
zd�
~d�
�d�d�d�d�d�d�dd�d �d$�d(�d,�d0�d4�d8�d<�d@�dD�dH�dL�dP�dT�d p ���	L �P
(`h �`L� �0"��#@%�P}P`�.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rela.rodata.rodata.str1.1.rodata.str1.8.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@-P.`/h0'pt�"@(� -�3�=�T8@H�0	S8�b8�(]@x�0|`��`�(�@��0���v �@��D�2�K�2h�%�0��-|��S�����P���`@�
�p��B	(��hpcre_config.o/  1575493209  1667  135   100644  4392      `
ELF>�	@@
���������w#H���Hc�H�>���H����1��fD�1����
1����1����1��
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�qq3Nm�����	�q.annobin_pcre_config.c.annobin_pcre_config.c_end.annobin_pcre_config.c.hot.annobin_pcre_config.c_end.hot.annobin_pcre_config.c.unlikely.annobin_pcre_config.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..grouppcre_config
�������� q  8L`T8LPD � .symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rela.rodata.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`
h'pq"@X-�3�=�T8@p0	S8b8(]@�0|`�`(�@�0��$�@��0�-|�S���P�0�00�@�`�	H	��pcre_dfa_exec.o/1575493209  1667  135   100644  46000     `
ELF>p�@@
AWAVAUATUSH��H�$H��H�$H��H��$� H�T$XI��H�w ��$�A�UH�|$(��$� L��$�H�\$hdH�%(H��$x 1�H�GH�t$xH�sH�t$`H�D$PH�GH�D$@H�H�D$ �G0%�D$H��$� ���D$t��$� ��H�H��H��$��D$H�H��H�_H�D$8����b��	�<H�H��A�|\��|$t��<H�t$8L�D$ ��|$1҃���@DL���FH��L)‰N��D�ډF�A�Ef����IŸA�}T��D�Z9�|��D$L��H��$x dH3<%(�D$L�eMH�Ĉ []A\A]A^A_�H�|$(�G.�X���f�A�Ef����I�A�}Tt�H�D$hHcXD� I��E���#H��H�t$`H�|$8�$H��D�L$�D�$D�L$�L��1���Bf����9�L��Bf����H€:Tt�D�t$HE����H�|$XHc�H��H)�H��H�\$PH9���H��H)؉�H�H)�H�|$XH�D$(H�|$XH9xvH�xH�T$8L�D$ E1�D�T$�A�Ef����9�|3L��A�[L)ǃ�E9���������BA��H���z�)ljJ��z�A�Ef����I�A�}Tt�Ic��Icۋl$H�D$L����L��$����D$s�D$sA��D��$���$�H�D$xH��@HH��$�H�D$XH��$�H�D$1�H�|$h��H�t$@H��H\$8�D$0�����H�D$I��D�_�D$H9�s-�0D�T$H�D$��t$0E��t�����fDE�������E1�E��L�t$`E1�D���D$4M��L�T$8DŽ$��=��B�����M;l$�K����R��A�A��E�n��I��A�V�A�F�E9�~RIc�H��LҋD�j��x�E��t{A�O�L��H��I�L
�f�H��H9�tW9u�z9xu�A��E9������IHcD$H�T$8Hc�H�|$`HD$I��H�D$hH�T$`H�|$8����f.�Hc�HD$ D�D$H�D$��$E��uH�
�<�DL$4�L$4H�5���tSHD$�|$HA��0��t���wd�<$8~5�F�w-H�
Hc�H�>���D$L��
���������E1��$=�w�H�=Hc�H�>��D�ۉ�����?H�=L�
�<�A#4�I�����tE1�D�x��H����?��	�A9�u�A���M������q:L�D$PH�T$XI9��^:1�H�B�I9�s4�R�����t�#fD�������#H��I9�u�H�D$P�wH�D$X� ����$|�����$h�����$T����$,����$@���H���?�H��@#4�H��ɉ���A���D$0��t&H�D$1�H�P���H����?��A	�9�u�D�D$0�G�D$����Hc�$�HD$PH9D$��#A���'���H�t$H9t$P��$A�������<$�t*A�D$D;d$�!����KE�kA��I��A�K�A�C��D$����'�D$0=��C�Q.��
������1Ƀ���9���4A������A�T$D;d$�����D�L$�KE�kI�{A�A�CE����/�L$0�����:�T!�L$0��( �����Ƀ�@��@��9��~"�$����MAD�LE�;l$�+����E�nA����A�F��I��A�F�A��A�F������<$�t*A�D$D;d$����KE�kA��I��A�K�A�C��D$���{&�D$0��
�O?��-1Ƀ�
��-�<$�uA��I��H�D$�R�u�@��f����9���9;l$�d��������E�nA����)�A�FI��A�F�A�N������A�t$D;d$�%����SA�CI�KA��T$E�k���r.�|$0��
�?4�m 1҃�
� �<$����}AD�ID�;l$�����A�FA����E�n��I��A�F�A��I��A�V��k���A�L$D;d$�����A�DD�D$E�kI�{��$�A�A�CE����'A�����A��tP����9�|$s��9��L�
��A�A��A����D�L�
E�AH�F�D�A�E��D�D$0D9���E9�@��	�<$+@��@8���.A��A��I������R��~,A�D$D;d$�����A�LE�kA��I��A�K�A�C��D$����$�����A��tH���:>�|$s�/>��H�
���������H�
�AH��|�����|$09���9���	ȃ<$+��8��
3A�����A�T$D;d$����A�DA�CI�KA��D$E�k���K#�����A��tK����;�|$s��;��H�=����������H�=�<GH�D�L�A�D��D�L$0D9���D9�@��	�<$+@��@8��4A��A��I���&���H�t$�Ff�$f������$��F�$<S���$�B����ED��h�,#H�D$��f�@f��$�f�����E4f����D��$� A�D$���!7D;d$����E�kA��I��A�[�A�C�A���v���A�T$D;d$���H�|$��E�kI��A�[�A�C��Gf�$f����H��8T�A��9T$�M��$��KH��H+T$ E�kI����A�C�A�S��Pf����HЀ8T��*A������H�T$A�L$D;d$���H��H+D$ E�kI����A�C�A��A�C��Bf����H€:Tt�A�����H�T$L��$�H��A�L�T$H��H+L$X��$� P��$�PAUh�H��$�PH�t$8H�|$XL��$`���H��0L�$L��$����������2�D$L�4�H�D$�Xf��$�f����HÀ;Tu�Cf����HÀ;Tt�H�T$L��$�H��A�L��$�H��H+L$X��$� P��$�PAUh�H��$�PH�t$8H�|$XL��$`�(�H��0��������$��L��$�L��$����X��8�t,D;d$�`�H�CE�kA��I��H+D$ A�C�A�C�A���
���H�t$H9�$��!A�D$D;d$��S�<$UA�CA�E�k��7A��9D$���H�D$I�� A���@A�C�E�k�f����)�A�[���D$���	"�D$0=��$��$�� ��$��&�D$0=���$=���$�E;l$�c��E�nA��I��A�^��A�F���t$����!�D$0��
�Y7�e%��
�$�E;l$���E�nA��I��A�^��A�F���D$���!�|$0H��������H����H�|$�BH��H����2�WH�
Hc�H�>��D$�����E;l$�o��E�nA��I��A�^��A�F��!�D$���FH�D$(�p4H�P���K.H;T$�2�E;l$���E�nA��I��A�^��A�F����|$���] �|$0��O �D$0Hc$H�
H��$��"H�
:�=1�E;l$�����E�nA��I��A�^��A�F��P�t$�����D$0=�w*Hc$H��$�H�
�"H�
:�7�E;l$�.���E�nA��I��A�^��A�F����H�D$H9D$Ps?H�p�H�D$(H9pvH�pH�D$�L$H�@��…���H��$�����u1ҋD$��t�D$0=�wH��$��у��Eу��<$��8��A�D$D;d$�v���E�kA��I��A�[�A��A�C��'�<$�t*A�D$D;d$�:��KE�kA��I��A�K�A�C��D$�����D$0=�I-�\#�� �:-��#�|$0����t1������1Ƀ���9��0+A����B��~*A�T$D;d$����KE�kA��I��A�K�A�C��T$�����|$0�����1�2!�W�������1Ƀ���9���&A����R��~*A�D$D;d$�,��KE�kA��I��A�K�A�C�D�L$E���h�D$0=�..���� �.���|$0����t1������1Ƀ���9��[)A����1����A�L$D;d$���D�D$�CE�kI�{A�A�CE����"�D$0=�2*�I�D$0=/ �*��D�L$0�A��_ t1�A��0����@��@��9��2�$�u-���AD�LE�;l$����E�nA����A�F��I��A�F�A��A�F���1�����H�t$H9t$P�ZA����H�t$H9t$@��H�D$(�p4H�P����,H;T$�/-A���;�H�D$�PA�D$D;d$�N���A�SA��I��A�[�A��A�C���H�t$H9t$@��A�D$D;d$����E�kA��I��A�[�A��A�C���D�L$E���H�D$(�p4H�P����+H;T$��5A�����|$�����|$0H�H�
H�5H��$�����H��$���H��$�����H�=�AH��$��D��<��Y)LcD$E1�LD$L9D$@��&D�<$�[H��$��Љ�����H��$�����FH��$��D�H��$��<��L&Hc�A��I�L9D$@�7&A���Ё��v�H�5��?�H�5�@#�H�����tI�pE1����H����?��	�A9�u���G����T$���j�D$0=�t���
����E;l$�5���E�nA��I��A�^��A�F�����L$����D$0=��'���
����E;l$�����E�nA��I��A�^��A�F����D$�����D$0=������ �x�i�D$0=��c=��XA���8��D$������A��tH��$�����|$09��9)9��1)�E;l$�!�A�TE�nA����A�I��A�F�����D$���"D�L$HE���L!;t$0��0�D$0���0H��$��9���/A�����D$���#;t$0��E;l$���A�TE�nA����A�I��A�F��>�H�D$(�@-�D�D$E�����@0 �_*H�D$(�p4H�P���(*H;T$��5A������A�L$D;d$���D$A�|E�kI�SA�;A�C���!A�����A��tO���:+�|$s�/+��L���A�A��A����D�L�E�@H�B�\��A�؋\$09���A9�@��	�<$+@��@8��A$A��A��I���&��<$mt*A�D$D;d$�9��KE�kA��I��A�K�A�C��L$���'�|$0H�
�������H�
���H�|$�AH�H���O���/�G����.���+H�|$�@88����1Ƀ���9���!A���`��A�T$D;d$�w�D�D$�CE�kI�{A�A�CE���5D�L$0L�D��A����A�L���D�L�L$E�@H�A�yJ����%E�AL�
Oc�M�>A��R��~*A�D$D;d$����KE�kA��I��A�K�A�C�D�D$E���]�|$0H�
�������H�
���H�|$�AH�H���O���a(�����-����H�|$�@88����1Ƀ���9���A����1����H�D$H�T$L��$0L��$�L�$H��A���pL��$�H��H+L$Xf����Ht$(��$� ��P��$�PAUh�H��$�PH�|$X�?�H��0����1L��$�L��$��6����R�A���R��$��P�PH�|$�t$L�G!���c�|$0��o"��O����A�0L��H+|$ ����F@����'H�=@��Hc4�H�>��|$���l�B�|$0��$���������	�vA����E�L$D;d$����C�L$A�C��$�A�I�CE�kH��$������B�|$0���$����������	�A��E��L��$��*�E�L$D;d$�F��CA�CI�[�D$A��D$E�k���q�|$0��*���!���	��A��E��I�����D�JE��~*A�D$D;d$����SE�kA��I��A�S�A�C��D$���N�|$0��@���7���	��A���W�E�L$D;d$�s��CD�D$A�CA�I�CE�kH�D$E���"�|$0����������	��A��E��L�\$���D�L$E���s�����A��tH����%�|$s�v%��H�
���������H�
�AH��|�����|$09���9���	ȃ<$+��8���A���a�H�T$�Bf����H€:Tt�A�D$D;d$�e�H+T$ E�kA��A��A�I��A�C���H�H����H�|$H�t$PDŽ$�L��$��GH��$L��$���$�H��$�H��$�H�D$@H)�$H�D$XH)�$H�D$H)�$�Gf�$f������$(�GH��$�f������$,H�����H��$H�D$(H�@@H��$ ��L��$�L��$������>A�T$D;d$�9��E�kA��I��A���A�C��A�[�����<$�t*A�D$D;d$�����KE�kA��I��A�K�A�C��D$�����t$0H�
L�
H�=L��$����H��$���H��$�����H�5A�AH��$��D��<��zHcD$H�t$�<$�L�uA��I���$L9L$@��D��$��p���HH��$�H��$�I��Hc�����������)�H��$��H��GH��$��D��<��qMc��$M�L9L$@�\A�A���=��y�����?H�5H�=�4�v#�I�����tI�yE1��7��H����?��	�A9�u�A���-����D$A�T$D;d$�F���t$�CE�kI�KA�A�C���(�t$0L�L�
H�=L��$����H��$���L��$�A����H�5A�AH��$��D��<����$LcD$LD$-���DE�LE�1���L9D$@�}D�<$A��L��$�L��$�L��$�L��$���$�H��$���$�H��$��S���P��I�������Hc҃�A�)���H�A�C�D��|���Hc�A��I�L9D$@��A����=�~�H�5��?�H�5�R#�H������t#I�pE1�fD���H����?��	�A9�u���J���A���`��H�t$�Ff�$f����H�T�:Tu�Bf����H€:Tt�A�L$D;d$�K��H��H+D$ E�kA����A�CA��I��A�C�������$�A������A�L$D;d$����H�|$��A�CA�E�k�Gf�$f����H�T�:Tu�Bf����H€:Tt�A��9L$����H��H+D$ E�kA����A�CI�� A�C��Z���R��~*A�D$D;d$�o���KE�kA��I��A�K�A�C��L$���K�t$0��
�� ��1���
����~�<$�uA��I�����M;l$�����A�VA��I��A�^��E�n�A�F�����B�D$��~*A�D$D;d$�����SE�kA��I��A�S�A�C��|$���s�|$0H�H�
H�5H��$�����H��$���H��$�����H�=�AH��$��D��<��E�t$LcL$LL$��~�<$�uA��I���$L9L$@w|�>����P��H��$�I�������Hc҃��)�H��$����H��FH��$��D�H��$��<���Hc��$I�L9L$@��A����=��z���H�5��?�H�5�R#�H������tI�qE1����H����?��	�A9�u���/���1�����D$������
����������� ����D�L$0�A������1�A�������rjH�|$(�,���|$�������A��A��I������H�ƒ�I9�s9��5��H�T$X�������|$0���t�r�������v�A��A��I���/��- �������&���	������D��L��$�D�<$L��$���$���$��U;l$����D$E�nA��I��A�F����A�v���A�F����A��A��I�����A��A��I�����A�D$D;d$������E�kA��I��A�[�A��A�C��`��A�D$D;d$�|����E�kA��I��A�[�A��A�C��-��A��A���!��A���;H�D$(�p4H�P���H;T$�?$A������H����Ѓ�<�t���=��'���?H�=L�
�<�A#����tH��1����H����?��	�9�u�=�������A�D$D;d$������E�kA��I��A�[�A��A�C��L��H�H����H�t$H�|$PDŽ$�L��$��FH��$L��$���$�H��$�H��$�H�D$@H)���$H�D$XH)���$H�D$H)�H��$���$�Ff������$(�Ff�$f������$,H�����H��$H�D$(H�@@H��$ ��L��$�L��$����Q����HD$H�|$�G�$�;��A���.���D�H�<$A�����D�KHc���A)�H��H��H�H��(H)�H�$HcH�\$PHcwH��H�H��H�)�H9�sH���P���������)�H9�u���!�E9l$�'��E���I��A�F�E�n�A�N�H��H;<$u��%��$4��$0H�t$��)��Ff����H��<Tt�I���VL+L$ H��E�A�����<w�CD��f����)ƅ��D;d$��E�A��I��A��A�C�E�k����H�D$�L$�pA��f�4$fA��E��I�����A�D��+L$ ��F<�H�5��Hc�H�>��A�����A�����A��A��I�����A��E��I���t��A��E��I���e��A��A��I���V��A���M��L�L$E�ID8H������@��@��9��A��A��I�����A�����A�����A�����A��E��I������A������A������A������A������A�����A�����D;d$������$�E�kA��I��A�C��DA�C����A���{��A���r��A���i��A���`�������L�L$E�ID8��������A���5��A���,��A���#��H�D$(�@,����A�D$D;d$�0����E�kA��I��A�[�A��A�C�����A������A������A������A�����D�HH�D$L��@C9������`���D�@A�@�<��A��A��D	����?���H�\$H9\$Xr1H�D$(�@,�����tH�D$(Hc@(HD$PH9D$X���|$L���D$L�������D$L�;�$�����H��$�H�L�\$H��L�$H�~�L�\$L�$��$�~"H�|$PH�D$XH��$�H)��H�D$H)��FH�D$(�@.����A�����A�����A�����A��A��I���y��A���p��A���g��A���^��A���U��A���L��1�A�p�Rf����9�*A�t$D;d$�S���yE�kA��I��A�{�A�C�����A�@���uf������t9���;l$�	��A�A��I����A�V�E�n�����1�A�T$D;d$������A�CI��A�K�E�k����G�E;l$����A�A��I����A�F�A��E�n��d��1��R��~*A�t$D;d$�w����E�kA��I��A�K�A�C��������E;l$�E��A�A��I����A�V�E�n����1�A�T$D;d$�����A�CI��A�K�E�k������E;l$����A�A��I����A�F�A��E�n����H�t$�v@8p����1҃<$��9��	A���s���HH�D$H��@9��������P�B�<������	���뮸�H�t$�v@80����듃�	�����z��D$0=/ �h���|$0���_ �P�1���0���@��`H�D$(�@.�����A������D$0-( �����A������D$0=/ tv�D$0=_ t=0�5��A���p��- ��
����������u��H�A�<������	����������u��H�A�<������	�������|$0�����������������	����E;l$�����E�nA��I��A�^��A�F�����D$0=/ t����D$0=_ t�=0�R�먋D$0=������r-( �������A���i���D$0-( ���������r7H�D$(�@,�D$%��:�A���0���D$0=�t�r
-( ��v�A������$-n��EAD�LE�;l$����A�FA��A���I��A��E�n�����r=H�D$(�@,����$�����:��A������D$0=�t�r
-( ��v�A�������	�8������D$0=/ ��	v3�|$0���_ ����1���0�������	�������- �������x���D$0-( �����A�����- �������Z��- ��
������A��A��I������A��A��I�����E��~�<$CuA��I��A���E;l$����A�A��I����E�N�E�n����A��A��I���t��A��A��I���e��A��A��I���V��A��D9��2���p��DH�D$�$�M�@��f����9��
;l$�@����E�nA��I��A�^��A�F�����A��A��I�������|$0��p�����D$0H��$�H����"2H�:2�a���A�����A�D$D9d$�	E�A��I��A�C�E�k����A�D$D;d$������E�kA��I��A�[�A��A�C��9��D��$�H�D$�R�M�@��f����9���;l$�2�������E�nA����)�A�FI��A�F�$A�F������$��+����8��I��H�D$�R�u�@��f����9���;l$������$�E�n��A��A�FA��I��A�F��m����~�<$ouA��I�����E;l$�t��A�A��I����A�V�E�n��.���$��$��M��EH�D$LE�$�ED̃��@f����9���;l$�����$�E�n��E��A�FA��I��A�F������|$0�����Q�D$0H��$�H����"2H�:2�R���A��E��L��$��l��H�|$x�D$0���47@84��A���J����~�<$�uA��I�����U;l$�Q����A�FA��I��A�^��E�n�A�F�����$��D�EED�ID�;l$����t$E�nA��I��A�F���E��I��A�v�����|$0��h���m�D$0H��$�H����"2H�:2�y���A��E��I���j���$��BLE\$�EED�L�\$;l$�t��A�A��I����A�F�E��L�\$E�n��"���|$0�������D$0H��$�H����"2H�:2�u���A��E��L�\$�����<$�uA��I��H�D$�R�M�@��f����9���
;l$���������E�nA����)�A�FI��A�F�A�F��d���E;l$������E�nA��I��A�^��A�F��5���|$0������	�D$0H��$�H����"2H�:2�1���A�������<$quA��I��H�D$�R�M�@��f����9��~;l$������E�nA��I��A�^��A�F����L�L$1�I�yH9|$@����1�A�y
������T$0D�<$�E;l$�}����A�FA����A�I��E�n�E�N��-����~�<$�uA��I�����E;l$�4����A�VA��I��A�^��E�n�A�F�������~�$��)�	��6�	�B�U;l$����A�A��I����A�F�E�n�����D$�U��;l$����A�F�$��I��A�^�A����E�n�A�F��h���<$�uA��I��H�D$�R�M�@��f����9��~;l$�X�������E�nA����)�A�FI��A�F�A�F���������$��*t��7DE�LEڍE;l$���A�>A��I����A�F�E�n����H�|$(H�t$HcO8H)�H9������<@8>������tH��H�t$(�v=@8p�~��A���k��H�D$�R�M�@��f����9���;l$�l���A�DE�nA����A�I��A�F�����$��(t��5DE�LEٍE;l$�)���A�A��I����A�F�E�n�����A������1��
��A������A�����A������t$0H�|$������������v�������A���}��H�t$��H��H��:TH�T$u�Bf����H€:Tt�H�T$H�T$L��$�H��A��L��$�H��H+L$X��$� P��$�PAUh�H��$�PH�|$XL��$`諾��H��0��������$A�L$L��$�L��$����X���8��aD;d$�ٿ��H�D$H��H+D$ A�A��I��A�C�E�k��������q��;l$������$��A�VA��A���I��E�n�A�F��M��;l$�o���A�A��I����A�V�E��E�n��&��1��<$�����;l$�:���A�A��I����A�V�E�n���H�D$(HcH8H��H)�H9T$�x
A������A������H�D$(Hc@8H��H�H9T$�%A�������h������;l$�������A�VA��I��A�^�E�n�A�N��f���;l$�����A�A��I����A�V�E�n��B���A���9���;l$�[���A�A��I����A�V�E�n�����H�D$(H�|$L�\$D��$�L�$H�H8�L�$L�\$�����A������D��$�H�|$H�H8L�\$L�$�L�$L�\$���3������A���D$���1��<$����D;d$�������$�E�kA��I��A�C��T3A�S�����L��$���E��k��H�D$(�p4H�P��t~H;T$���D��$�H�|$H�H8L��$�L��$��L��$�L��$������A���ۿ��H�D$(HcH8H��H)�H9T$��A��鸿��A��鯿��H�t$(H�|$HcN8H)�H��H9��S��v<@87�F����H�|$(H�D$�w=@8p�%�A���[���@D;d$�x�����$��D�����$��D$L�����D��D$L�
���$��D$L�+���$�������D$L�2��xH�D$H�
�@9������������;l$�ڻ����A�VA��I��A�^��E�n�A�F�鉾����H�|$(H�t$H��<@8<��	A���d���H��$���D����A���G���H��$�����������	�E;l$�K���A�>A��I����A�F�E�n�����H�|$(H�t$HcO8H)�H9�����<@8>�����tH��H�t$(�v=@8p���A�D$D;d$�ۺ����E�kA��I��A�[�A��A�C�錽��H�D$(�p4H�P���1H;T$���H�|$D��$�H�H8L��$�L��$�D�L$�D�L$L��$���L��$��d�A������D;l$�9�����A�VA��I��A�^��E�n�A�F����A��A���ܼ��A���Ӽ��I��A��A���ü��A��I�����H��$����<�I��H�t$�EH�VH9T$@v
�~
�	;l$�������E�nA��I��A�^��A�F��Z���H�|$1�H�OH9L$@�E��1��
���7��H�t$1�H�FH9D$@�����1ɀ~
�����A��E9�|j��uf�D$Hc�HT$���m���E�I��A�F�E�n�����A�2M�ZE1��A�BA�E�j�D$騻��A��韻��H�\$PHc�Hc�H�H�H9�sH���x���@���@��@��)�H9�u�E;l$����A�؃�A�FE�E�nA�V���9D$�������E�n��I�� A�v�A�F�A�V�����H��$������A�����;l$����A�A��I����A�V�A��E�n��κ��D����E;l$���A�TE�nA����A�I��A�F�閺���|$0H�������������H��BH�|�������H�D$(�p4H�P���2H;T$��D��$�H�|$H�H8L��$�L��$�D��$��D��$�L��$���L��$����A��E��L��$��ڹ���E;l$�����A�TE�nA����A�I��A�F�駹������H�|$�@8x��������xH�D$H�
�@9���������H�|$�@8x�����0��A��A���?���A���6���H���|$0L��$�H��L��$�H�T$L�$�H�T$L�$L��$�L��$�����H�D$(�p4H�P����H;T$�f�D��$�H�|$H�H8L��$�L��$�D��$��D��$�L��$���L��$���A��E��L�\$�q���f.��E;l$�������E�nA��I��A�^��A�F��8���H�D$(�p4H�P���uH;T$��D��$�H�|$H�H8L��$�L��$�D��$��D��$�L��$���L��$����A��E��I��鼷��H�D$(H�|$L�\$D��$�L�$H�H8�L�$L�\$����H�D$(Hc@8H�|$@H)�H9|$����A���b���H�|$(H�t$�<@8>tA���F���A��E��I���7�����t�H�|$(H�t$�=@8~t�A������H�t$(H�|$HcN8H)�H��H9�����v<@87�������H�|$(H�D$�w=@8p���A��E��L��$�鶶���H�t$(H�|$HcN8H)�H��H9�����v<@87�����t;H�|$(H�D$�=@8x�h�A��E��I���Y���A��E��L��$��E���A��E��I���6���;l$�X�����E�nA��I��A�N�A�F��
���A������f.��|$t� ����D$4����H�D$h��$�9X����H�D$(�@,�u�Ā����|$L��ݲ��H�D$H�t$@H9��ʲ��H9D$X�������$��D$L������H�D$(H�\$PH��$�H�@H)؉H��H)؉G進���H�t$(H�|$HcN8H)�H��H9��i��v<@87�\���t!H�|$(H�D$�w=@8p�?�A�����A���������۾�������H�|$(H�t$�=@8|���A��鹴��D��$�H�|$H�H8L�\$L�$�L�$L�\$���������A���{���@A���n���A���e���A���\���DH�D$(H�|$L�\$D��$�L�$H�H8�L�$L�\$��u	A������A�����H�D$(H�t$@Hc@8H)�H9t$���A�����H�|$(H�t$�<@8>tA���ҳ��A���ɳ����t�H�t$(H�D$�v=@8pt�A��駳���D$L���ΰ��A��E��L��$�醳���;l$�������A�FA����A�I��E�n�A�F��M����D$��;���I�����0���A��E��L�\$�����H�t$(H�|$HcN8H)�H��H9�����v<@87�����t:H�|$(H�D$�w=@8p�f�A��E��L�\$鿲���A��鮲��A��E��L�\$靲��fD��AWAVAUA��ATUSH��H��(H��$`H��$pD�L$D�D$(H�L$H�|$ dH�%(H��$1�A��oZ�D�L$h�,H��H����H������H����H��u��$h������$h�����$x��HDŽ$H�C H��$�H���H�D$0H��������tH�VH��$�tH�F H��$��;ERCPI���lHcD$(�|$Mc�N�dL�|I��H�D$`��I�G�H�D$8A�FH��$��Ɖ�L��$����t$@�ƃ���@��	�A�N�t$\A�v�D$Y��A�vD��$���$���$Hc�H�H�H��$�������L$%p��pE�= �]
vO=@��=P��	=0��	���H��$dH3%(�b
H��([]A\A]A^A_��
=u�DŽ$�
DŽ$��$�������N���	���$��@���L�d$0L��$�H�t$pH��L��L���I��H����	M��MD�L�|$0�W���H�NH�L$0�����;ERCPI��H�D$0�1����DŽ$�\$��t�D$ ��H��$�H�D$PH���HA�F�Ã�f�\$ZA�^���\$��u$��
f�|$ZuH�\$0H��t
�C���D$D�����H�D$H������\$l�\$@�t*A�V����H�؉\$@H�\$P���T$h���D$l�D$M��H��$�M��%�D$,fDD�\$,E���jD�T$M��E����H��$�D�L$M��H��E����I9�r9�nfDHc�$H��H)�H��I9�w��$A8<$��I��L9�v7��$��t�E1�H�K8L�����uH��$�I��L9�w���D$�Z�����D�D$DE���3M9��:A�E9�u�,@A�E9��I��M9�u�f.��D$��RH�|$0H��t�GtL��L)�9G(w|�L$@����*L��L)�H=��1������L�H9D$8��t$h����I9�v6�H�pH�t$89���H�D$8�t$@��H�P9���H��I9�u�����s����I��I9������$��uiHc�$H��H)�H��I9�w��$A84$��I�D$I9�v�A�L$����t�������u�H��I9�u�M����H��H�K8A�L������S���H��$��H�D$8@L��$�H��L��H��jjA�F��P��$�P�t$HD��$�L�D$H�L$XH��$踧��H��0����m����|$Y�b����T$��t(��$H��$�����I9��W��D$M�l$���M9������A�}�
����M9�����A�}
����A�F �������$����v��$�����D�\$,I��E�������DM����f�|$Z��H��$�H�D$`H�I9�vw�|$����M9�r$�d@L9��I��M9��B���H��$���$��u�Hc�$H�H��I9�r͉���$��Hc�A8Lu�����DM����M9����A�D$��<�t����DA�E��<������I��M9�u�M���������?�����$
A8D$����'������������$
A8D$�S�������Hc�$H)�I9��4�����$A84$�"�����������$
A8D$����������.���@M9�����A�EH�T$P�9�u���A�E�9�����I��M9�u����D�D$\H�K8L�����������H���H�t$HH�������M9������A�EH�L$HH�ƒ�H�������s(�l���@A�EH�ƒ�H��������I���I��M9�u��3���E1�H�K8L���������A�}�
������$��������M9������E1�A�}
A��M�����I9��|���H�p�H�t$8;T$@����;T$l����H�D$8�t$@�|$l��H�P9���9����H��I9�u�������H�H��$�H�D$P���M9��e�����$��tBL9���I�EI9�v'A�U����t�q������ueH��I9�u�L��I������Hc�$H�H��I9�r�����$��Hc�A8Lu��������$
A8Du�����f.�H��$�I���S���H�K8A�L������M����u���A�V������D$DH�D$H�����$
A8D�����Z���H�H��$�
f��$�;���DŽ$�+���D��H��T$������D$(�T$������D9�����A���������<��������������DH�\$P�D$DH�D$H�,+�=���DŽ$�
���H���D$D�����H�\$H�
������������������������x���������n���������d�������Z������P���
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx���QB�B�B �B(�A0�A8�G� L�@L�At
8A0A(B BBBA+�A[�AH�AB�AE�AI�A[�Aj�A^�AH�AB�AE�AI�A[�A��Aq�AH�AB�AE�AI�AN�AH#�A^�AH�AB�AE�AI�AV�Ah��F�B�B �E(�A0�A8�J�
8A0A(B BBBA!�H�B�H�H�D�b�r^7Tu���Q��s�@s�����0	AW^p��������/�Q�=Od.annobin_pcre_dfa_exec.c.annobin_pcre_dfa_exec.c_end.annobin_pcre_dfa_exec.c.hot.annobin_pcre_dfa_exec.c_end.hot.annobin_pcre_dfa_exec.c.unlikely.annobin_pcre_dfa_exec.c_end.unlikelyinternal_dfa_execpoptablecoptabletoptable1toptable2.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_GLOBAL_OFFSET_TABLE_memcpy_pcre_utf8_table4_pcre_utf8_table3_pcre_ucd_stage1_pcre_ucd_stage2_pcre_ucd_records_pcre_ucp_gentypepcre_callout_pcre_OP_lengthsmemmove_pcre_was_newline_pcre_is_newline_pcre_xclass__stack_chk_failpcre_dfa_exec_pcre_try_flipped_pcre_default_tables_pcre_valid_utf8$��������
�
<V
���������
 �%���������&���������%���������&��������>
'��������Z
(��������f
)��������'��������$(��������/)���������'���������(���������)���������'���������(���������)���������
(�
��
�
�+
��'���������(���������*��������)���������%���������&���������'���������(���������)��������g'��������z(���������)��������('��������>(��������U)��������p
<�'���������(���������)��������K
P� '��������� (��������� )��������5!+��������L",O�"'���������"(���������"*���������")���������#%���������#&��������V$'��������]$(��������d$*���������$)���������%%���������%&���������''���������'(���������'*��������()���������(%��������)&��������|+%���������+&���������++���������,,O�.
p�0*��������\1-���������3*���������8
��8
��:
��:
��;
��;
�K<
�[<
�8=
�H=
�RC.���������C/��������GD/��������oE*��������G/���������I'���������I(���������I)��������5J/���������J*��������6K0���������K/��������XL/���������L/���������N1���������O/��������P/��������YT3��������V/���������W/���������Z/��������x[.��������$\4���������\.���������]5��������<^1�������� r^  d���~�z~ �$d($,0p4w8{<@�DH�LP�TVX\�`�d�hvlpt�x\|�����p�"�u�3���������-�1�����p
�t
�� ��M���
���a�e� �$��
��
�-!�7��<�
� � q u    �$�(a,� 0� 4- 8�<|@�D�H�L�P�T�X�\-`1d5h�l�p~"t6"x�|��������������6���M��
���Y��
���������(�(�����(�$�(��'��'��'�8�<@DHLPTX\ `$d(h,l0p4t8x<|@�D�H�L�P�T�X�\�`�d�h�l�p�t�x�|������������������������������������������ �$�(�,�0�4�8�<@DHLPTX\ `$d(h,l0p4t8x<|@�D�H�L�P�T�X�\�`�d�h�l�p�t�x�|����������������������������������	�	�	�	�	�	�	�	� 	�$	�(	�,	�0	�4	�8	�<	@	D	H	L	P	T	X	\	 `	$d	(h	,l	0p	4t	8x	<|	@�	D�	H�	L�	P�	T�	X�	\�	`�	d�	h�	l�	p�	t�	x�	|�	��	��	��	��	��	��	��	��	��	��	��	��	��	��	��	��	�
�
�
�
�
�
�
�
� 
�$
�(
�,
�0
�4
�8
�<
@
D
H
L
P
T
X
\
 `
$d
(h
,l
0p
4t
8x
<|
@�
D�
H�
L�
P�
T�
X�
\�
`�
d�
h�
l�
p�
t�
x�
|�
��
��
��
��
��
��
��
��
��
��
��
��
��
��
��"��"�"�"��!��!�� �� �� ��"�;"��!�� �,�0�4�8�<@D,/0/z-~-	*
* _($c((g(,P/0�-4%*8w(<|@�D�H�L�P�T�Xu/\y/`)-d--h}l�p�t�x�|�/�I-�������������������7�;�����c�g�������[������� �$�(�,�0�4��������%)-� �$�(=,40�34�38�3<#4@�/D�0H�0L/P0TJ3XN3\�2`�2d�2h�2l2p2tH3xL3|�2��2��2��2�2�2 ��Q.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rela.rodata.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`h'pr^"@�r�
-�^3�^=�^T8@@}0	S8`b8`(]@p}0|``�``(�@�}0��`� �@�}x-�0Si-|�iS�i��iP��i��iP�@H�0 k#	0pux��pcre_exec.o/    1575493209  1667  135   100644  64104     `
ELF>(�@@
L���Lc�1�I)�M9��hAWHc�AVAUATUSH�ALc$�L��A���"�Ad���ON�L9��H�-H�L�L�L�
�oDL��A�$M�l$��=��6M��9�tD���PA��I�A��A����Hc�A���B�(����D)��Hc�A�SAD�9���L9�s�>L�n�����~���?�T�R#<�D�r������g���J�t6L���H���B�����?��	�H9�u�K�t5�:����C�I��B:D�u(��D)����[]A\A]A^A_����[1�]A\A]A^A_�1��!@D�>L�AHE�<H��C�C8u�A��A)�E����fD��?D�tC�vB#�L���E�v���������O�|4M��@I��A�T$�����?��	�M9�u�O�d5�z���f.�AWI��AVM��AUATI��USH��xH��$�H�T$�L$L�L$(H�D$dH�%(H��$h1��D$0�D$4A�Fd�D$ I�H�PI�I;F��I��$�I;F��I��$���H�D$(E�,$H�$A��Hc�A��r�F!H�=A��Hc�H�>��f�A�l$f����D-A;F$��I�VLc�M��M��M��N�4�B�D2F���D$ A�D$ )�H�H��L��I+�$���b�0���ۉt$0A��$�ۉA��t$4��$����t$8H�5H�t$(H�t$(L�T$HM��L��D�\$@H���4�D$@PL�S�t$ L�L$ �L$<H�T$0�^���H�� D�\$@L�T$H��t=���uG�D$4A��$�A�Gf����I�A�<Tt�I�T$�D$ �\$0F��B�D2A�D$ )�H���1�H��$hdH3%(���H��x[]A\A]A^A_�DH�D$L��$�H��$�H��$�H�D$�<���f�A�D$f����I�E�,$A��Tt��!���A�D$f����A���`�E<T�`i�D$ H�5��$�D�l$ H�t$(�hHc�L��I��H��A�4M��L��UAUH�t$ L�L$ �L$<H�T$0����H�� ��t=������Cf����H��S�f���Ҁ<T��A���D�D$ I�N,L��L����������M���I�GM9��h*�|$ ����ME�l$I��I������fD�D$XA�D$f�����D$P1�A��&M�l$��1�D$hA�]D�d$ ��E��t����?M�e�$�`-��~VI���I��I9��ZrA�I�W9���I�׺D)��@M9���@I��A�G�9�����B�:9�}�;l$P���t$h���b��t$P9���zM���M9��r�A�9��gz���)�I�lL����L9��i�9���hH��H9�u�L$X���'4M��M��M��L9��-�����$�I��H�l$�XH��L��M��L��SI��jUL�L$ �L$<H�T$0��H�� �����M9�s�����DE�D$M�l$�D$X�D$hfA��A��l$P�h���M;���/@E��D$ I�wD�…�t
A�����^I�VPD����j���E�l$I��I������M;���CZE��D$ I�D�…�t
A�����[I�VPD�������I���E�l$I�����f�M;����YA��L$ I��Ѕ�t�����[I�FP�������DE�l$I��I���?���fDM;����YE��t$ I�D�…�t
A�����^I�VPD����H����U���DD�D$ I���E����KI9��� 1�M;���+KA�7���juI�FP���0�������I�t$1�A�<$u9��������E�l$I���v���DE�l$L�|$I���]���@I���I9���������I���H��� x�K���xH�HcS H�sI�~I���H����C$L�c�D$H�D$(E�,$H�$����M;����=A�D�D$ I���E��t=���]=��Y�����9��
���G�����f�A�D$I�T$�D$X�D$@�����/d�D$HI��A��D$P�D$8����|$@�}U�D$X�T$ �����C=<�'H���Hc�H�>��DM���D�l$ L��L)�E����HE�D$��M��A�����iH9��0�M�OA�A����]I�VHI�L$�B8����E�l$M��I�����f�M����T$ L��L)�����VH����VI�GA�A8\$�����E�l$I��I���J���f�I�GM;������A���M����}���H9D$�r���A�D$x���`������X���fDM���M9���;E��l$ I�wD�…�t
A�����YA��
�>\��7A��
���������A�^|�������������$�L��L�l$�hA��l$H��H�sM���ATL��L��jjL�L$ ���H�� ��������������K=�����K��t=����x����Cf����HÀ;Tt�I���$����
D��$�L��L�l$�hA��l$H��H�sM���ATL��L��jjL�L$ ����H�� ���L��t=������Cf����H��<Tt�I��<U������$��������A�D$E�l$<S��L�P�����D�P�����[��h�:EH��I�t$M��L����$��hUjjL�L$ �L$<H�T$0�J�H�� ���t��t=����0���D��f����L�H�p�8TI����SD�h���M;����9A�D�T$ I���E��t=��<U=������5�� �
�����5=������=�������f�M;���9E�I�D�|$ D��E��t
A����UTI�VPD����>����K���H��I�\$M��L����$�H�ލhUj�t$ L�L$ �L$<H�T$0��H�� ������fD�Cf����HÀ;Tt�L�cD�k���M;���_8A��t$ M�G�Ѕ�t�����U��H�5����H�=����H�A�|$�GH���i"A�D$H�Hc�H�>��@�C�H��,H���M�l$����\$h�D$X���%N�D$P��������M;���RE�D�l$ I�wD��E��t
A����:UI�VPD����f������M;����QA�D�L$ I���E��t=��,V=�����F3��
�������z�f.�M;���cQA�D�\$ I���E��t=���S=�9���3�� �*��t3��	�k�����fDA�v(M�������CM9��-�M��I�GA��$�H9D$�����`������DM;���O6A�?L�$M�G�T$ ��A����t����QI�D$M����VA�T$9��y�E�l$M��I���	��I�T$�Bf����H€:Tt�L�bD�j����H���5M��M�狄$�M��L��L���P��$�P�t$ H�T$0L�L$ �L$<���H�� ������D����f��C�H���M�l$���\$H�H���D$@����I�D$8���DM�eE�U�$�}M�FHC�A�ÉD$P�D$ ����!��~YI���I��I9���aA�I�GE:�8�I��)�� @M9���0I��A�G�E:��B�:9�}�;\$8��B�D$H����t�l$ ����B�t$89��2nM���M9���yA�E:�n���)�I�\L����L9���_�E:��bH��H9�u�D�T$@E����JM��M��M��I9��a��$��hH��M��L��H��Uj�t$ L�L$ �L$<H�T$0�0�H�� ���&�H��L9�s���fD�D$X��M�l$1��D$P�D$X�D$h��@M�l$��D$P����D$X�D$h�n�M�l$1��D$P����D$X�D$h�I�@M�l$1��D$8����D$@�D$H����@A�\$M�l$�D$@�D$Hf���ۉ\$8���DA�D$�D$@f�����D$81�A��3M�l$��1ۉD$H�n���fDA�n\I�������?I9�����$���M;�����A�v(���	ZI9����D�D$ I�N,L����������f�A�D$E�l$I��H�$���M���M9��X��[�M���M9��@�A�v(���	DM9��3�D�D$ I�N,L��L�������M���IcF,I)�M9������M���M9���KA��L$ M�_�Ѕ�t�����P��H�5����H�=H�����H��G�D��<����M9��S.L�d$8L�-L�=D�d$ L�t$@I���;������������G�D�A�<���-Mc�M�M9���-A�A���E��t�=�v���?E�D
C�@C#�M�����E��t(I�[1�f�D���H��A��?A��D	�9�u�A���f���fD�S�H�
��D�,H�
I�D$���\$P�������EӉT$8�L�`�S�\$X�\$@�����D$HE�����fDA�����D9l$8�	�D$P����bA����!#A���"hH�
D��Hc�H�>��A�D$M�l$1��D$@�D$Hf�����D$8�K���M�l$1��D$8�D$@�D$H�$���@M�l$��D$8����D$@�D$H���f�A�D$�D$Pf��D��I�D$D�l$8����A�D$�D$Hf�����D$81�A��@���D$PI�D$��S�\$X�\$@���V1�D�X�|$@L�`�@��E1�\$0�D$4����I�D$�D$8����D$H�D$P�f�A�D$L��f����H)��<`�:H�t$H�NH�6H�t$<_�)<<b�!<A��Ut	I9���pH�D$(E�l$I��H�$��f.�D�l$H�\$L�d$8��$��hH��I�t$M��D��UL��jSL�L$ H�T$0��H�� ���i?��t=������A�D$f����I�A�$<Tt�L��L�d$8<]t<T���f��Cf����H��<Tt�A���M���D�k�t$M9�t<U�zL�cM����A�D$�T$ f���ȅ�u6��I)�M;���P�M9��vM���E�l$I������I�׃����t�I�W�I;����A�G���<�u��H�����<�t��Ic��I��I9�������@H���5M��M�狄$�M��L��L���P��$�P�t$ L�L$ �L$<H�T$0��H�� �����M��������q��H���5M��M�狄$�M��L��L���P��$�P�t$ H�T$0L�L$ �L$<�$�H�� ������D����I�\$H����Bf����H€:Tt�H��H�rM��L����$��hUj�t$ L�L$ �L$<H�T$0��H�� �����E�l$I���B��A�D$f�����A���A;V$��7A�FX�2���f��$��8M���M9��gKA�v(����[IcV,I)�H��M9��7�A�^@A8�)�����A�FAA8G����@A�D$M�l$f����I�A�$<F�d6<K�~@<M�T6��A�\$���D$HA�D$f����f��f���и���E�I���D$8���~?M;����DD�T$ �E���D$@�D$@L�d$@M��A����1@M��L������M�A��A9��?M9���c@A�<$M�D$�����v�@��t���?H�5D�H�5C�IB#<�L���E�I���t�K�tL��DH���B�����?��	�H9�u�O�d�b����A�D$�D$H�D$Pf�����D$8I�D$�`���DI�D$�D$8�D$H�D$P�9���fDH���5M��M�狄$�M��L��L���P��$�P�t$ H�T$0L�L$ �L$<��H�� ������D���f�M��M��I���M��A�Gf����L�41�L9�tA�Ff������$�I��$�H��$�IcT$ I��$�H��$�I�GH��$�H�Љ�$�H�����P4L��$�L��$�L��I�t$1���D$H�ߋ�$�L��L�|$(I��M����$�A�<`@�Ń��t$ H�5�I��H��A�4I��L��D$(L�PU�t$ L�L$ �L$<H�T$0���H�� ���z9H��$���t=�����=Hc�$�H�{L���H���A�Gf����I�A�<T�v���H��$�L�|$(H��$�H���L9��p��1��e�A�\$f�����9\$�_+I�VHc�H�������E+�l
)�E�l$A��F�:A��K�H;A��M�	:�����D$PA�D$f�����D$8A�D$f��f���и���E�I���D$H���>E�,$�W�fDA�D$!M�T$<F�R;<K�;<M�B;��A�l$"���D$@A�D$$f����f��f���и���E�I��&�D$8�D$ ����9��~{I���I��I9���eA�I�OH�ƒ�H����A�����I�ϹD)��0M9���I��A�G�H�ƒ�H����A������B�99�~�;l$8�����D$@M��M��M����O��$�D�p�D$ ����D)�L�T$ H��M��L��AVL��j�t$ L�L$ �L$<H�T$0�e��H�� ���[�B�D-9D$8�J�M;�$�L�T$ ��$I��A�E�H�ƒ�H����A���r���f�H�
H����DŽ$�A�D$L��H��$�I�����$�I�FH��$�I���H)�H)�H��$���$�H�T$��$��\$H)‰�$�A�D$f������$�A�D$f������$��������$�A�����$�I���H��$��х��5��1�E�l$I�����<T��e�D$ ����������D$8D9���[M���L��M9��nSL�|$8H�=A��H�5L�d$@H�D�d$4L�t$PD�t$0�ND��A�������D��G�D�D9�����D9���"Mc�A��L�E9���"I9���RD�EA�D��A���v�H���?�H��@D#�I���A��t%L�U1��A���I����?��A	�9�u�A���H����D$8D9���ZM���L��M9��R2H�L�|$8A��H�=L�d$@H�5H�D�d$4L�t$PI���U�D��A�������D��G�D�E9$�����;D$0��!Mc�A��L�E9���!L9���1D�EA�D��A���v�H���?�H��@D#�I���A��t#L�U1�fDA���I����?��A	�9�u�A���H����D$8D9���YM���L��M9��:1L�|$8H�=A��H�5L�t$PH�D�t$0L�d$@�_f.�D��A�������D��G�L��A�<������	���D9��� Hc�A��H�E9��� I9���0D�ED��D��A���v�H���?L���@E#�H���A��t'L�UE1��A���I����?��A	�D9�u���@����|$8D9���XM���L��M9��"0H�
�t$0�H�A��H�D9�t*I9���/�U����v
��?�����t�DD�T$HE���2��$�M��M��M��I��H�l$�XH��M��L��L��SjUL�L$ �L$<H�T$0���H�� �����I�V�M9�����D�L$ E���LI��밋D$8D9���WM���L��M9��A/L�|$8H�=A��H�5L�d$@H�D�d$4L�t$PD�t$0�N@D��A�������D��G��D9�����D9���Mc�A��L�E9���I9���.D�EA�D��A���v�H���?�H��@D#�I���A��t&L�U1�f�A���I����?��A	�9�u�A���H���A�D$f����I�A�<$Tt�A���I���D$E�,$�A��A�W�I�G�����u�H�������t�I9��vI����0����VI�FP����0�������AA8D$�ƒ���8��$��E�l$M��I������QH�5A�D$9����Q�����B�<������	�8�u�����f���u�����fD��M��Ѓ���A�������M�Ń�L�T$ H��M��L��AVL��j�t$ L�L$ �L$<H�T$0�o��H�� ���e��;l$8�Y��M;�$�L�T$ ��A�UM�E�Ё���w���H�5��?�<H�5�#�D�O������fK�|
L��@H���F�����?��	�H9�u�O�l���������N�(�������M���M9���A�L�L��"��
������A��E9��3�M9���A�I�I����=���J=�t�v�-( ��v��J��M���M9���A�H�H�-�"��
�����A��E9����M9��sA�M�G��=��KM��=�����v�-( ��w�����M���M9���A�H�H�-�6�� t��+=�t=�����A��E9��+�M9���A�M�G��=��>JM��=t�v�=/ t��$+=_ t�=0t��1��M���M9���A�H�H�-���	���A��E9����M9��]A�M�G��=��hIM��=�����{*�� ����v�=�����=�u����I���I��I9��KZA�������I�NPI����p���D)��A����Z��I����L��B�:A9����M9�u�M��A��$�H9t$rh�#��M���M9�s@�A���x!I�NP�t���A���<���;I��M9�r�A9����M��A���L9|$����f�������A�D$x�������A��I���I��I9��KYA�������I�NPI����p���D)��A����Z��I����L��B�:A9����M9�u����M���M9���A�H�H�-�.M�ǃ�wI�FP������A��E9����M9��RA�M�G�Ё��v���?�4�v#T�D�^����t�K�|L��DH���F�����?��	�H9�u�O�|�{���M���M9���g�I�GL9�s1A�W����t�EZ�������0?H��L9�u�I��A9����M���7�M���M9���I�F,L���H�D$`A�v(����CIcV,L��H)�H��H9�wA�v@@83�WPH�CM��L9��GX�S����t��=fD�������=H��L9�u�A9��?�A���M��M���z��f.�L9l$������`��I���I��I9��
WA����A��I�NPI����/���D)��A������I������B�:A9����M9�u����M���M9��U����A���x&I�NP�t�����A���<��@8I��M9�r�A9��O�����M���M9���eA�L�L�A�I�o��=��O��
��O�b@��
�A@A��E9���OL9��CXI���M���Ic�I)�M9����I�A������������D�L$ A��E���n��~QI���I��I9��;BI�GE:�����I��)��fDM9��GI��E:W�����B�:9�}�;\$8�u#D�D$HE���V�|$ �D$8���1$9��0OM���M9��[ZE:�O��)�I�\L����L9���@D:�_CH��H9�u�L$@���.+M��M��M��I9������$��hH��M��L��H��Uj�t$ L�L$ �L$<H�T$0����H�� ������H��L9�s����@I�VH�����~]I���I��I9���DA�I�O:����I�ϹD)��$f�M9��I��A�G�:�U��B�99�}�;l$P���D�D$hE���lX�t$P9���LM���M9��pVA�:��L���)�I�lL���L9��;�:
��:H��H9�u�|$X����M��M��M��I9�������$�I��H�l$�XH��L��M��L��SI��jUL�L$ �L$<H�T$0���H�� ���x��M9�s��l��@���\��M���M9��P-D�\$XL��M��M���M��M��D�d$P�'��A�A9�� ��H���9���I9��F�H�h�с��vȃ�?H�5L�D�C�IC#�L���E�I����2"N�TH���H���F�����?��	�L9�u�J�D
����s���H���X����������M���M9���L���M��M��E��M���!f�L��9��E����9��I9����0L�X��v�H�
��?L�
D�C�@C#4�E�H����E��t�N�DL��DH���B�����?��	�L9�u�K�D�z����L�`E1�A������d��|$@D�XL�`�D$H���@�ډ\$0�D$4E���3�A���i���H�
Jc�H�>����Z��M����H�5�|$ ��I�σ�A9��$7M9��Q���I�OA���t܀��v׃�?���t̃�Hc�L�|��M���L�d$h�H�-L�t$`H�D�d$ L�D�t$0�E�M�׉Ѓ��������DUA��;D$4����D9�������A9��T<M9���<A�M�W��E��t����v���?H�5L��4�vA#�D�~�����w���E1�L�֐H���F�����?��	�D9�u�O�|:�R���M���L��L�t$`�H�-D�|$0L�d$hH�L�D�t$ �CDL�Љу��������TUA�T�;T$4����D9�������A9��x;I9���=�L�P��E��t����v���?H�5L�%D�C�@C#�E�`����E���t���N�D L��@H���F�����?��	�L9�u�K�D"�J���M���L��L�t$`�H�-D�|$4L�d$hH�L�D�t$ �KDL�Љу�H�5�������TUA�T�D9<�����;T$0������A9���9I9���:�L�P��E��t����v���?H�5L�%D�C�@C#�E�`����E���l���N�D L��@H���F�����?��	�L9�u�K�D"�B���M���L�d$h�H�-L�t$`H�D�d$ L�D�t$0�GM�׉Ѓ��������DUA�T��B�<������	���D9�������A9���7M9���9A�M�W��E��t����v���?H�5L��4�vA#�D�~�����m���E1�L���H���F�����?��	�D9�u�O�|:�B����E�,$I������|$@�v/�D$X�|$ �����6<����H���Hc�H�>��D9l$8�RM���M9��YE1҃|$@L��A��L�H��3��
������D9��sHc�A��H�D9l$8�^L9����E���=��<=���Bv�-( �������D9l$8�hQM���M9��lXE1҃|$@L��A��L�H��L�� ��;�)�=�t
1�=���D9���Hc�A��H�D9l$8��L9����E���=���;=�5;v�=/ �(;���=_ t�1�=0���D9l$8��PM���M9���WL��L�H������1��
�[-��
t+A�v|���5L�J���v���t��( ���LH�H�A��D9l$8�LL9�r�H���D9l$8��OM���L��M9���L�L��&I�FP��tzHc�A��H�D9l$8tiL9����U��Ё��vǃ�?A��@A#�H�����t$H�uE1�����H����?��	�D9�u������z����L$H�������$�M��M��M��I��H�l$�XH��M��L��L��SjUL�L$ �L$<H�T$0���H�� ������I�V�M9�����A�F���<�uH�����<�t�I���D9l$8��NM���L��M9���L�L��.I�FP���<���Hc�A��H�D9l$8�'���L9��o�U��Ё��v���?A��@A#�H�����t"H�uE1�D���H����?��	�D9�u������t������D9l$8�NM���L��M9���L�L��-I�NP���}���H�A��H�D9l$8�i���L9����U��с��v���?A�
�@A#�H�����t$H�uE1�����H����?��	�D9�u�G���w��r���f.�D9l$8�;MM���L��M9��(L�L��.I�FP�������Hc�A��H�D9l$8�����L9����U��Ё��v���?A��@A#�H�����tH�uE1����H����?��	�D9�u������y����;���D9l$8��LM���L��M9��?L�L��-I�NP�������H�A��H�D9l$8���L9���>�U��с��v���?A�
�@A#�H�����t$H�uE1�����H����?��	�D9�u�G���w��r���f.��D$8=�����II����Z����D$8=�����LD9���KM���I�F,L��H�D$@M9���RA�v(����6IcV,L��H)�H��H9�wA�v@@83��AH�CL��L9���I�S����t�4@������
4H��L9�u�A�E9D$8�����A���H9l$������������A�Fx��������*��f.�D9l$8��JM���L��M9��F=L�L��-I�NP���7���H�A��H�D9l$8�#���L9��	=�U��с��v���?A�
�@A#�H�����tH�uE1�����H����?��	�D9�u�G���w��x���f.�I����D$8H��D)�L)�9���<I�,���A�F���<�����H�����<�t���DM��A��$����U���H9L$����E���-( �������2���f�-( ���r������DA����������A���������������	�7�����f�=/ �%��vc=_ ���=0�
��鸿���=������=�����长��D=/ �����w+- ��
�����p���- ��
�����]���D=_ �M���=0�����=���DL�d$8L�t$@M�����fD��?H��4H�#��v�FH���Hc���H�t$8M�d5���I�U1����H����?��	�9�u��D$p�$tG��H�5�����H�=����H��G�|��9�tH��$`��D$p����M���Hct$pH��$`L�l$`L��$�L��L��H+D$8L�t$xH)�H�L$HI��I��H�\$@�A��L��$����.fDH�T$8H�t$`L�����uL|$8A��A9��<M9�sӅ��L9|$@�H�t$HL��L�������M��I���I��I9���1I�NPA�I�G�������I��)��I��A�G���k���B�:A9����M9�u�f�M��A��$����=���H9t$�z��-���DM��A��$�H9L$�R��
���DI�܋\$ ���P��H�D$(�I�H�$H�D$(DŽ$��)����L��$�L9|$L�d$x��M9���A��$���������������A�D$x��������&��fDL�|$8L�d$@L�t$P��M��M��A��$�L9l$����N���<�<�H���Hc�H�>��M���Ic�I)�M9������I���I��I9��a,L����PD)�A9��lL9��x���H���P���
�ϻ����
vҀ��t����M���M9���OL��I�^,�@IcV,I)�L9�wA�v@@8u��8H����D)���A9���8M���I9��sCA�v(��t�E1�H��L��H�����t��E���DI���I��I9��{/I�NPA�I�G������I��)��I��A�G�������B�:A9�����M9�u����I���I��I9���>I�NPA�I�G�������I��)��I��A�G�������B�:A9��<��M9�u��H�I���I��I9���.I�NPA�I�G��Y����I��)��I��A�G���:���B�:A9�����M9�u�����I���I��I9���=I�NPA�I�G�������I��)��I��A�G���۹��B�:A9��~��M9�u���I���I��I9��n=I�NPA�I�G�������I��)��I��A�G���|���B�:A9����M9�u��+�I���I��I9���)L����PD)�A9���L9�����H���P���
rۀ�
�������u�����I���I��I9��f)L����PD)�A9�|uL9������H���P��� t߀��tڀ�	�θ����I���I��I9��)L���	L9��C���H���P��� ���������������	������PD)�A9�}�I��A������%��M���M9���LA��I�W<w0<�&&<
�>�����D9�<L9��a>I��A�I�W<v�<
��%L9��8>A�
��J��A9�}�I��A�����fDA�Vh1������M���M)�A�i��������鿷�����鵷��<���H���Hc�H�>��D9l$8�9;I���I��I9���3A��P���v=��;�D$8��D)�I�tL��H��H9�tL9��L3��J���v��t�H����\$8D9���:I���I��I9��3A��P�����:=���:�؃�D)�I�tL���L9���2��J���v����t�H��H9�u��D9l$8�Q:I���I��I9���2A��ƒ��� t<	�(:�D$8��D)�I�tL��H��H9��1���L9��a2��у��� t݀�	t������\$8D9���9I���I��I9���GA��ƒ��� ��9<	��9�؃�D)�I�tL���L9�t��у��� �������	�����H��H9�u����D9l$8�e9M���L��M9�s=�E��
����
tA�V|��uk�P���v=�u\H��A��D9l$8tML9�r�A���H9l$s:��t6A�Fx��~)����I����D$8H��D)�L)�9�w�I�,�D$H������$�M��M��M��XI9������H��H��M��L��SH��j�t$ L�L$ �L$<H�T$0�Ѳ��H�� ��t��ƴ��D9l$8�f8M���M9��iF�D$8L��I�^,��D)�M�l�8IcV,I)�L9�wA�v@@8u�e1H��L9��E���M���I9����A�v(��t�E1�H��L��H�����t�������\$8D9���7I���I��I9���EI�NPA����7�؃�D)�I�tL���L9��w����������H��H9�u�����\$8D9��h7I���I��I9���/I�NPA���C7�؃�D)�I�tL���L9���/���C���H��H9�u��5����\$8D9��7I���I��I9��EI�NPA����6�؃�D)�I�tL���L9������������H��H9�u������\$8D9���6M���M9���DI�VPA���v6��L����D)�I�L��E������H��H9��x���L9�u��&����\$8D9��26I���I��I9��2DI�NPA���
6�؃�D)�I�tL���L9��������
���H��H9�u�����\$8D9���5I���I��I9���CI�NPA����5�؃�D)�I�tL���L9��q����������H��H9�u����f�I���H��tsA�T$f����f����P�y�|$89��A��guHA�n0M�V8��~<A�f����9��MEIcV4M��1��A�Ef����9��M/��I�9�u�A�D$f����L�����E��M��M��I�����M��M��D�\$XM��I������L9��s���A�W����t�a���f�������I���H��I9�u��;���I��M��A��$����=��騰��I��M���7�P����5<]�51������D$H��D$8����H���_I�FHA�I�WA�t$�80�@���E�l$I��I���Ϯ��H��H��H��$�H���DHc�$�L��$�H�����A���L9|$s��u1��ߴ��A�Fx���n���1��Ǵ��A�^ I�NHc�)�HcË\$����L��I+��9ӉD��BOÉD$�N��1�I9�t'M;��wI�G�I���A�G�I�VP�����M;���[���A�I�NP������<���IcV,L��H)�H��I9������A�^@A8�w���������A�FAA8G�_�������$�=���I9��ݲ������E�^`E���Ӯ��E�VlM���E���V���M9������A�v(���#M9������D�D$ I�N,L��L����������M���IcF,I)�M9��f����i���f��Cf�����A���A;V$�A�FXI���H��t	;E�3#E�,$���E�m鱬���D$89��B7M���L��M9���L�l$8L�A��L�d$HD�d$P�1fD��A�A9���Hc���H�A9��yI9��5�U��Ё��vƒ�?H�5A��@#�H�����t H�uE1����H����?��	�D9�u�����w��r���f�9��g5M���M��M9��
L�l$8L�A���'@9��CHc���I�A9��1M9���A���Ё��vʃ�?H�5A��@#�H�����t I�sE1����H����?��	�D9�u���H������L�|$8L�d$@L�t$PA���H9l$��������A�Fx���������fD=/ �=���w3- ��
�+���(����- ��
�����
���D=_ �����=0�������D��	�W���ګ��f.�I��DA�D$f����I�A�<$Tt��L���L�l$`L��$�L�t$x;l$P����D�\$hE���H9L��;l$P�<M���Hct$pL�l$`H��$`A��L�|$hL��L��H+D$8L�t$pH)�H�L$H��I��H�\$@L��D�|$PL�L$xL�d$PI���+H�T$8H�t$`H�����uH\$8A��E9���I9�sӅ���H9\$@��H�t$HL��H�������L��1҃�	�����- 1҃������I���L��L�d$8H�D$����I���I��H�D$�鑳��H��$�L�|$(�$H��$�H���L9�������$�5���IcF,L��Hc�H)�I9�����A�^@A8�����������A�^AA8_��������f�H�H����DŽ$�A�D$L��H��$�I�����$�I�FH��$�I���H)�H)�H��$���$�H�L$��$��t$H)���$�A�D$f������$�A�D$f������$��������$�A�����$�I���H��$��҅����������I�A�D$�^���L�$L���L����跤������A���M��M���ʨ��M;�$��L��鷨��������M���M9����L�-L���3fD�Ѓ���A����r���L�؃�9���I9����L�X�с��vƒ�?L�=E�D
C�@C#�E�x����E���N�D8L��f�H���F�����?��	�L9�u�K�D;�����<��N�p����ڧ���D$@�D$@�D$8�5���A��H�I����F�ƃ��t$P�4H��t$8�4������EƉD$H�����FH�I��"���,H����t$@�4������EƉD$8����D$ I��!�D$@��D$8���w�������M��L�d$@;\$8�O��D�L$HL�l$(M��M��M��E������$��hH��M��L��L��Uj�t$ L�L$ �L$<H�T$0�¤��H�� �������;\$8�����M;�$����A�}D�D$ M�u��E��t�����H�t$(����k�����M���w�����FH�I���Ã��\$H�H���D$8����������/�D$8���邿��L�|$(L9��
����$H������D$X�D$X�D$P騧��E�mI���������������M��M������L�l$8�t$@������$�M��M��M��XH��M��L��L��Sj�t$ L�L$ �L$<H�T$0L�\$@�u���H�� ���k���L�\$ I�S�M9��W���A�C���<�uH�����<�t�I���L�l$8L�d$HD�\$@E���5��$�M��M��M��XH��M��L��H��Sj�t$ L�L$ �L$<H�T$0���H�� ���٤��H�U�L9��ʤ���E���<�u�H�����<�t�H��뛋D$8����Hc�L�d$XA�L�$$H�D$@�L|$@A��D;l$8��M��L���L�����*�����u�A���M��M���A���M9�������/����M���M9������D$ �D$`H�=H�5L�d$pH�H�-��A���D$h�E�M�_D��A���v�|$h�M��D��A�������A�B�G�D��|������M9�r8������������G�D��|���Mc�M�M9�spA�A���E��t�=�v���?H�D�H�C�RB#�M�����E��tM�_1�E���I��A��?A��D	�9�u�A���f���fDL�d$pD;l$`������D���D$`�D$`D9���6M9������!��H���?D�H�C�RF#�A�Z��A��E�������M�|M���I��A�B�����?��A	�M9�u�M�|���I�t$I��c�R�W��$�M��M��Pj�t$ L�L$ M��L��L$<H�T$0����H�� ����f�M��M���8�A�T$I�L$����H��H�=��?�47��Hc�H9���+�F�M�d�
��H��I��H�AA8W������L9�u�D�i�*���H���?�4H��v#<�D�V�����%K�tL��fDH���B�����?��	�H9�u�O�|I�D$M���%'����[E�l$I��魟��H���?�4H�D#��vD�N�D�����A������$1�H��DH���B�����?��A	�9�u�N�|A��������L���>����H�
��?H�5D�C�@B#�E�H����E�������O�DH���H���V�����?��	�L9�u�J�|�k���H���?�4H�D#��vD�N�D�����A������#1�H���H���B�����?��A	�9�u�N�|A����ϣ��L��鬣���H�
��?H�5D�C�@B#�E�H����E��t)E1�H���H���F�����?��	�D9�u�J�|���������c���H�
��?H�5D�C�@B#�E�H����E�����O�DH��f�H���V�����?��	�L9�u�J�|黫��H�
��?H�5�<�#�D�O�����'���K�|L��@H���F�����?��	�H9�u�O�D���H���?�<H�D#��D�O�D�����A����t&K�|H��H���B�����?��A	�H9�u�J�tA����e����A����H���?�<H�D#��D�W�D�����A�������K�|H��fDH���B�����?��A	�H9�u�J�t龥��H���?�<H�D#��D�O�D�����A����t-K�|H���H���B�����?��A	�H9�u�J�tA���������q����H�
��?H�5D�C�@B#�E�H����E�������E1�H��H���V�����?��	�D9�u�J�|�{���H�
��?H�5�<�#�D�G�����"���K�|L��@H���F�����?��	�H9�u�O�\���H���?�4H�D#��vD�N�D�����A�����bK�tH��DH���B�����?��A	�H9�u�N�|A����N���L�����fDH�
��?H�5D�C�@B#�E�H����E������E1�H��H���V�����?��	�D9�u�J�|�ۡ��A�^`���۟���ޛ��fDI9��e���1��>
��H��U���M��I�VHA�L$���
8:�*���鞛��fDD��A����JHc�M�d<E9�t<D��H�5D�����H�=����H��GDD�E9��>���E�,$M���ҙ��A�v I�~Lc�)�Hc��4�B�4�L��I+��B�t��|$�r9�O��t$��A�t$f�����9t$~I�NHc�D��E���������e���E�n0�M�F8E���p�A�f����9��&/Icn4L��1���f����9��^��H�D9�u��0���A9�����������A9��B������L�|$hL�d$PL�t$pD�L$XE���$��$�M��M��M��hH��M��L��H��Uj�t$ L�L$ �L$<H�T$0��H�� �����L9��ܙ���C�H�S���<�uDH�����<�t�H���H�oL9������
��A��D9l$8����H���X��D9l$8�<&M���M9��T,�D$ H�L��L�|$PL�d$XH�=I��H�5��H�A���D$@f�D�EL�]D��A���v�|$@��L��D��A�������A�B�G�D�A�<��1I9�w?�f.������������G�D�A�<���Mc�L�I9�vx�EA���E��t�=�v���?H�D�H�C�RB#�M�����E��t'L�]1��E���I��A��?A��D	�9�u�A���^���fDL�|$PL�d$XA�E;D$8�bD�D$HE��������$�M��H�M��M���D$0H��M��L��H��D$8Pj�t$ L�L$ �L$<H�T$0蹕��H�� �������H�E�L9������H��L�L�
D�T$ H�=L�L�l$(L�5�0fD�ƒ���A�����CA�D��<���H���E��E��t��	�H���U�Ѓ�<�t���=�v���?A�4�vA#����t�H�uE1�����H����?��	�D9�u��m���DA��D9l$8�I9������L�|$PL�d$XA���H9l$��������������A�Fx�s�������fDL�l$(���H���?D�H�C�RF#�A�Z��A��E���.���H�lM��fDI��A�B�����?��A	�I9�u�I�l�����D$0E1�鳩��H�H�D$(�����H��D9������[���H�EL9������}
�iA��D9l$8���H������|$@E�\$�D$PA����D$8���I�����D$H�D$0A�D$��D$4������?A�
�@#�I�����t!H�wE1�@���H����?��	�D9�u�A�@���L������D9�����L9�����I�����H�����H��A���H9l$����������A�Fx�������Y���f�IcN,H�H��I9��������A�^@��Hc�A8��������v���A�FAA8D�n����a���f�H���
���H��A���H9l$���������A�Fx���ߖ���ś��DL�l$8A���L9\$�I����A�A�Fx���0�鋛��E�mI���|���I��M����I�������?H�5H��4�v#<�D�F����� �1�L���H���B�����?��	�9�u�O�t���L�l$8L�d$HA������,�H9l$�!�A�Fx�����֚��fDL�L$xL�t$pL�|$hL�d$PI9�A���@��H9\$��@���������������A�Fx����������E�mM���l���L�d$X�t$H9t$8������D$PM��M��M����
��$�Lc����D$ H��M��L��L��D$(Pj�t$ L�L$ �L$<H�T$0�g���H�� ���]����t$89t$H�M���L�$L���L����
�������M��D$8�<�� ���A�F|���������������A�F|����������=�t����-( ��v��ԑ��L�d$hL�t$`A��u���D�\$ E���
�t$89���!I���L9���$A�EH�ƒ�H����A�����!���)�I�tL���#H9����H�у�H����A�
��s	H��H9�u�H��I9��,�����$��hH��M��L��H��Uj�t$ L�L$ �L$<H�T$0���H�� ����H��L9�s����I��L�d$hL�t$`A�����H��A���H9\$�3������+���A�Fx�������6���fDH��A������Y���H9\$�N���A�Fx���=������DD�D$ H�L$`L��H������;���M����X���L�d$hL�t$`A��У��I��L�d$hL�t$`A�鸣��L�d$`I��A��$����z������L�d$`A��$�L9|$�
����ȏ�������L�d$`M��A��$����1���霏��M��L���,��L��H���?�H��RD#�H���A���t%I�T$E1�fD���H����?��A	�D9�u��Hc�����;\$8��M9�$��)�t$ M���D$0�:fD�H�t$(�����Hc��I�;\$8trM9�$���A�>�����v��|$0t���?H�5�H��R#<�H������tI�V1����H����?��	�9�u���p���M�$��XH��M��L��L��Sj�t$ L�L$ �L$<H�T$0�?���H�� ���5���I�F�M9��&����L$ ��uI���A�V�����u�@H�������t���L�d$`I��A��$�L9l$�����׍��H���N���H��颼��H��A��D9l$8�����!��?A�4�vA#��^�����0���1�I��A�W�����?��	�9�u�L�|�
�����?�4�v#D�D�^�����z���K�|L��f�H���V�����?��	�H9�u�O�|�K�����?�4�v#D�D�^���������K�|L��f�H���V�����?��	�H9�u�O�|�u�����?�4�v#D�D�^�����Ĵ��K�|L��f�H���V�����?��	�H9�u�O�|镴��L�|$8L�d$@L�t$PA������	���H9l$�����������y��Hc�I��I��ي����?A��R#�H������t%H�uE1�����H����?��	�D9�u�������?A��R#�H������tH�uE1�f����H����?��	�D9�u���!��M��L���Z��M��L���w��IcF,L��Hc�H)�I9������A�^@A8�x���������A�^AA8_�`�������D�D$ I�N,L��L������4����7����D�D$ H�L$@L��H������M������E�,$I��雉��H�����H�EHcU I�~H�uI���H����E$L�e�D$H�D$(E�,$H�$�T���L�|$PL�d$X��;l$8��M���M��M9��;L��)�у���A�
��s~H���I�;l$8tpM9��	A���с��vă�?H���@A#�H�����t I�vE1����H����?��	�D9�u�G����u�����Nu���$��XH��M��L��L��Sj�t$ L�L$ �L$<H�T$0轇��H�� �������I�V�M9������A�F���<�u�H�����<�t�I���E��D�l$(�\$8M��M��M��A����U	A���J���H�Jc�H�>��$�L�5�hH��M��L��L��Uj�t$ L�L$ �L$<H�T$0����H�� ������;\$(�����M;�$�����A�EI�u��=�����H�
��������H��BH��D�;D$4����;D$0������D$(I���O�����$�L�5�hH��M��L��L��Uj�t$ L�L$ �L$<H�T$0�I���H�� ���?���;\$(�3���M;�$�����A�EI�u��=�����H�
���|$4������H��BH��T�H�9<�����;D$0�‡���D$(I���E�����$�L�5�hH��M��L��L��Uj�t$ L�L$ �L$<H�T$0�}���H�� ���s���;\$(�g���M;�$�����A�EI�u��=��8��H�
��������H��BH��T��B�<������	���;D$0������D$(I���F�����$�L�5�hH��M��L��L��Uj�t$ L�L$ �L$<H�T$0貄��H�� �������;\$(�����M;�$��(��A�EI�U<���|$0�r����D$(I��돋�$�L�5�hH��M��L��L��Uj�t$ L�L$ �L$<H�T$0�0���H�� ���&���;\$(����M;�$��O��A�EI�u��=�����H�
��������H��BH���;D$4����;D$0������D$(I���P���H�
��?H�=D�C�@B#4����E��tI�E1����H����?��	�D9�u���F���1��P���H�������鶼����?H��4H��vD#�H��ɍv�A�����M�\7L��H���B�����?��A	�L9�u�M�L1�]�M��Lc��t$H9t$8|�_f��D$8M�D$89D$HtBL�$L��L����L�T$ �v���L�T$ ��u�A��$���tM9�$�wL9T$�:M9��s�����$�Hc�XH��M��L��L��Sj�t$ L�L$ �L$<H�T$0L�T$@�:���H�� ���0���L�T$ I)�M9�s�����A���������H9l$������]��L��靧��L��A���������H9l$�����y��H�
��?H�=D�C�@B#4����E��tH��1����H����?��	�9�u���S����(������w���A�FA8C������d���L��邳����?A�4�vA#��^�����ݰ��1�H��H���V�����?��	�9�u�H�l鸰��L���ƅ��������A�FA8E��������M��A��$����r�L9t$�g�A�D$x���U��`����������A�FA8E���餂��L������L9����}
�
A��E9��I���I��A���&���M��M��L9|$�i�D$M��$�A��$�H�D$I��$��w���L������A�D$f����I�D�8Tu�Pf����HЀ8Tt�H�p���M9�scH��I�EM��H�T$HH�D$P�$L�L$XL�T$`A�f����9D$8�w��M9�s)LL$HH�|$P�L$@I�qL�T$`L�L$X��L$@��t���9��J��IcV4I�EH�D$@M�l�-A�E�H�T$H�L$Pf����9D$8�����I�9��	��H�|$@L��L$PH�T$H���t�����I����I9�stH��H�BH�l$PH��H�D$X�0�EL�D$8f�����9D$~I�vH�D��E������I9�s.Hl$PH�|$X�L$HH�uH�T$@L�D$8�H�T$@�L$H��t��iD9��]��IcN4H�BH�D$8H�T
�Ff.�H�T$@H�L$H�B�f�����9D$~I�vH��<������H�D9����H�|$8H��H�L$HH�T$@���t�������tA�FA8C����H�������$����|$@�D$0�Z�|$ H�-��H�-H��M��L��L��D$8Pj�t$ L�L$ �L$<H�T$0�}��H�� ������;\$(����I��$�I9������|$@���t$XI�MA�E��@���U���@��Hct�H�>��M9�$�vA�}
uI���D$(H��M��L��L��D$8Pj�t$ L�L$ �L$<H�T$0�}��H�� �����;\$(��~��I��$�I9��~���|$@�'A�EI�U��=����t$X�N��������H�5��Hc�H�>��D$(M��H��M��L��L��D$8Pj�t$ L�L$ �L$<H�T$0�k|��H�� ���a~��;\$(�U~��I��$�I9�����A�ED�t$ M�]��E��t=�����L�����L�5L�
A�H�=���A�FA�D��<�u2��}���ƒ���A����A�FA�D��<�����Hc�I�L9�����A��t$PD�l$ �‰�E��t�=�v�H�5��?�H�5�R#�I������tI�sE1����H����?��	�D9�u�A�M�a���<
�9}��<
v<��-}���D$(I���I���<
r�<
�}��<�u��}��< t�<�t�<	��|����< ��|��<���|��<	u���|��<��<�W<
t���|��I�T$P�u��|��I�T$P��{����|��I�T$P��g����|��I�T$P��S����{|��I�T$P��?����g|��I�T$P��+����S|��L���'��M�����M��L������I���L��I���t����$��|$ M��D��M��M��D�p�Z
H�-H��M��L��L��AVj�t$ L�L$ �L$<H�T$0��y��H�� ����{��9\$8��{��M;�$�����A�EI�U��=���I�L$H���9D$P��{��I�Ճ��I�����M��M��������$�M��D�T$0M��M���|$ �D$(�{L�5H��M��L��L��D$0Pj�t$ L�L$ �L$<H�T$0�y��H�� ���
{��9\$8��z��M;�$������A�EI�u��=���I��9���z�����H��鷴��A��$�H9\$������{�A�D$x���i��0�����$��D$ M��M��M��hH�H�D$(A�H�\$(ASU�4�D$0L�P�$��A��$����N�L9t$�C�A�D$x���1�麁��L������A9��
����M���r���E9��t���M��A��$�H9l$������y��A9�����M��A��$�����y��H9T$������y��L�����;a��$�E�A�ҍhA��A��W��AUM��H��L��UAR�t$ L�L$H�L$<H�T$0�[w��H�� ���Qy��I���e���A��M����$�M��E)�M��hE��H��M��L��L��Uj�t$ L�L$ �L$<H�T$0�w��H�� ����x��C�.9D$P��x��M;�$��u���I��A�E�9�t���x��A�t$(���kIcL$,H)�H��I9�wA�|$@A8}����@����x��I�M�Z���D9���I���I��I9���L��H�BL9����R����t�f.������uH��L9�u�A�E9D$8�����_���A��D9l$8�{L9��H���H���,���M��A��$�����w��H9l$������w��M���	��A��M����$�M��E)�M��hE��H��M��L��L��Uj�t$ L�L$ �L$<H�T$0�u��H�� ���|w��C�.9D$P�lw��M;�$������I��A�E�I�T$H:t��Fw��A��$����l���1w���L���^��|$HL�l$(M��M���D$8���M��������L���i�A�~p��v��A�~t���Ic��I��H9D$�h���v��L�����L��駰��A�t$(���mIcL$,H)�H��I9�wA�|$@A8}�r
��@���yv��A�EI����=��(�����?H�5�|
k�#��w���t$M�D5L��H���W�����?��	�L9�u�M�l5�t$X�V�������H�
��Hc�H�>��L9|$M����M9�A��$��„�������u��M���L��I�^,L9�����A�v(��ulIcV,L��H)�H��H9�wA�v@@8utsH�EL9�sA�U����t�3�����u%H��L9�u�L��A��A�����u��L����H����D�D$ H��L��H������%���M���덃�����A�FA8E�v�������I��=��������
���������t��I��=��t���I�� ��t�����	�����t��I��=���t��I�T$P����[����t��I��=��H���I�T$P����7����mt��I��=��_t��I�T$P��������It��I��=�����I�T$P�������%t��I�Ճ�
���w3��
����t��A�|$|����s��-( �������s��=�t���s��-( ��v���s��I��=���s��I�T$P����n��s��=���s��=��S��s��=/ �~s����- ��
�/��es��I��=��Ws���J��
���
��@s��L���(��D�D$ I�L$,L�����@�������?H�=E�Ak�B#�A�x���E������M�L=I��I��A�P�����?��	�M9�u�L�l>��D)�H��M��L��L��D$0Pj�t$ L�L$ �L$<H�T$0�p��H�� ����r��B�+9D$8��r��M;�$������I���D$0A:E�u��ar����?A��������H�H�T���M���^�����$�L�l$ A��M��Hc\$pM��M��h�"H��H�t$ L���D�T$(��ugLl$8A��D�T$(M��L��L��ARUj�t$ L�L$ �L$<H�T$0��o��H�� ����q��D�T$(D9T$P��q��M��$�H�t$8L��H)�I9�v��|$pD�T$(��L��H)�I9���H��$`H��L�������I�D�T$(�W���E1�I�L$,L�����@�������?H�=E�Ak�B#�A�x���E���?�M�L=I��I��A�P�����?��	�M9�u�H�t>��I��=���wO�� ���v6=����=��z��p��=_ ��p��=0�_��p����	�Q��p��=/ �A���=_ �0�=0�%��[p����?H�=E�Ak�B#�A�x���E���&�M�L=I��I��A�P�����?��	�M9�u�H�t>����?H�=E�Ak�B#�A�x���E�����I��I��A�P�����?��	Ѕ�u�H�t>�~�- ��
�x��o��L���z��L�����A��D;l$8�����H���|���H���S���H���-���H�4$H�t$(A��uH���KH��H	�H�L$(<W��PM��L��L��Uj�t$ L�L$H�L$<H�T$0L�T$ �#m��H�� ���o��L�cL�$M���&�����?H�=D�D
Ak�B#�A�x���E��t%M�L=I��I��A�p�����?��	�M9�u�H�T:I��=��,���D)�H��M��L��L��AVj�t$ L�L$ �L$<H�T$0�l��H�� ���wn��B�+9D$8�gn��M;�$������I��A�E�I�T$H@:,u��@n��L���̸��H�5��?�<H�5k�#��w������L��H���W�����?��	Ѕ�u�M�\3���<
uhH9����A�}
���I�M��D�$I�t$M��L��PUj�t$ L�L$H�L$<H�T$0�k��H�� ����m��D�$E����I��頰��<��ym��A�|$|�@��hm����?H�=E�Ak�B#�A�x���E���L�M�L=I��I��A�P�����?��	�M9�u�H�t>�"�L���b��I�W�ɴ��-( �������l��M��M��M���}�M��L����y��A���������L9l$����L����M��L����M��AVH��L��UM��AR�t$ L�L$H�L$<H�T$0�j��H�� �l��PH�sM��L��Uj�t$ L�L$H�L$<H�T$0L�T$ �Sj��H�� ���4����Dl��A�D$x�������s��L�d$pA������M��1��}���t1�A�D$AA8E@���t����������k����`���H���#���M�����M��L����M��L������t81�A�D$AA8E@���	�M9�A��$���L9l$�„��Ǖ���k������L��1����=��(�I�T$P������Mk��I���
�I�����I�����I������AWAVAUATUSH��D��H���H��$ �L$8H�D$(dH�%(H��$�1Ɂ�oZ�|$��H���zI��H���nD��$(H����E���� ˆT$S�N��$(�����S�{E��HDŽ$����HDŽ$���L�K H���$ H��$(�CHDŽ$���$$H�D$H��tMH���t	L�VL�T$��tL�VL��$���tL�V(L��$��tL�VL��$���tL�N M��H�
I��LDɁ;ERCP�d��A�OE�_L��$xD��$�DŽ$hH�L��$8HDŽ$�H‰�H��D���H��$pHc\$8��$\��L�%�D$��������H��$��D$d��$T�������$XD�������$LD�������$PD���
����$`D�����A����$d�E�I��@H��$@D��%���$�=���=�������D$��H��$�dH3%(�D$��H���[]A\A]A^A_�fDL�t$H��$�H��D�D$H��$�H��L�L$L���I��H���(M���P�xID��@D�D$L�L$H�l$�<�����������EƉ�$l��D��%p��pE�= �g
��=@��	=P��	=0����H��
H��$f��$0��tA�G��
Ic�H�D$pI�l�D$��t
A�� ��A�GA��ƃ�H�t$0�VUUU��$(��$(��)�D�Rf����D����D����)�9���D�DL�L$@Ic�D�\$<�L$ H��D�D$�D�D$�L$ H��H��D�\$<L�L$@H��$��C������D��$DŽ$H��A�GDŽ$������D�ꉔ$9�$(}D���D$T�o�C�4�VUUUH�|$(D��$��DŽ$H��A�GH��$DŽ$������D�D$T)�9�$(H����$AL�H��t7��Mc���J����Hc�H��H�|�H��H9�r@�����H��H9�v��A��A�WD	�fD�\$j���|$@u,���f�|$juH�|$H��t�G�[	��D$lA�����H�D$x���GA�@��A�ӉT$`����A�Ӄ���$�A����$�H�A��H�}�E�ց�H��D�d$<L��$�L��$�I��H�|$X�L$H�D$HH�D$ H���M�D$L������H��$��D$H��H�����H9�r5�cHc�$H��H)�H��H9�w��$0@8;�IH��H9�v0��$��t�E1�I�L$,H�����uH��$�H��H9�wЋD$<���A����2�D$l���ZH9��=H��$8�E�A9�u�#�E�D9��H��H9�u�����n	=�+���DŽ$�
DŽ$��$0�����H��H9��L�����$��uaHc�$H��H)�H��H9�w��$0@83�H�CI9�v��K����t������u�H��I9�u�L����H��I�L$,A�H���������H��$��f�|$j��H�L$xH���YfD��$�����H�t$H��t�FtL��H)�9F(�n�L$`���tqL��H)�H=�c1�A�����H�H9D$XsP��$�����I9��,�H�pH�t$X9�t"H�D$X�t$`�H��I9���H�P9�u�H�D$X�H��M��H��H��$�H��H��$�HDŽ$�jjjL�L$PH��$��_��H�� D��$hE��tH�|$ ��=�������=���ue�L$T��tH��$��|$S����H�D$ �D$����H�������$(�D$�����H�t$(�L$8L)��N���f�=���t,������D$�D$S�z����=���t��u�D�T$H�]E����D�L$E��tA��$H��$�����Hc�$H)�H9�w��$0@8u���D�D$@E���vI9��m�{�
u	I9���H��$H��H�������H�D$HH�H9������DH���F�����H9�w������������$18C������f��������$18C���������DŽ$��������$��|$`����H��$�����DŽ$l���DŽ$l����H9�����EA9�u����D�ED9�����H��H9�u����E�GA��fA���/A���D$lH�D$x���H9������D�D$dI�L$,H������|����D$T��������~���H��$xH�D$pH�H9��:����D$����H9��%�����$����H9��EH�EH9�v/�U����t�������	H��H9�u�H��H�ŀ}�
������$���������H9������1��}
��H����D�t$8L��D�D$<L�L$0D��D�\$ �L$����`D�D$<�L$D�\$ L�L$0E������E9������E���������<�������D$�����i���@DŽ$�d���H��$�H�L$ �	���DŽ$�B����;
�#���H��$��@ ������$����v��$��H�����I9��q����E��<�t�a���@���<��O���H��I9�u�L���>���L��$8�D$lH�D$xA�<8A�������|$T��t:��$(H��$����$(9�$�~DŽ$HH����$H1���u��$��������$(�#�H��$xH��$��D$H�t$(H)ʉH��$�H)ʉV���H9�������EH�ƒ�H�����H�L$x��s+�����EH�ƒ�H��������Z���H��H9�u��L���H���D$lA�����H�|$x���H9�r)�&���fDH9��oH��H9��
���H��$x��$��u�Hc�$H�H9�rЉ���$0��Hc�@8tu���������$18Du����I9��H���H�H�H�L$X9�$��5���9T$`�+���H�D$X�t$`��$��9�����H��I9�������H�P9�u�����������$18E�Q�������@Hc�$H�H9���������$0��Hc�@8t��������������$18D�������f.�H��$xH�����I�L$,A�H��������������E1�I�L$,H������y������DŽ$�
����D$������H�D$(H�sH�x��$(�P�Hc�H��������D$�����y��D$��l��D$�����_��D$��R��D$�����E��D$�����8��
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�l#Z�E�B �B(�A0�A8�J0A(B BBBH������H8������A
0C(B BBBD��ЖB�E�E �B(�D0�A8�G�d�I�D�D�W�z
8A0A(B BBBF��L�B�G�W���J�F�A�W�$�K�H�B�N�}�K�H�B�N���V�B�B�W��Y�B�D�W���a�H�D�W���J�B�D�W���L�E�A�S�B�a�H�D�W�d�a�H�D�W�w�U�B�D�W�[�a�H�D�W���S�A�D�W�c�H�E�D�W�6�J�B�A�W�t�H�E�D�W�r�J�B�D�W�,�J�F�A�W��	�J�B�A�W���J�F�D�W��
�J�B�D�W�!�J�B�D�\�k�J�B�D�W���Q�B�D�]���J�B�D�W�
�N�B�D�W�'�N�B�D�W�E�J�B�D�W���J�B�D�W�[�J�B�D�W���J�B�D�W���J�B�D�W���J�B�D�W���J�B�D�W�[�J�B�D�W���J�B�D�\�c�N�B�D�W�v�N�B�D�W�u�N�B�D�W�l�K�B�D�W���N�B�D�W��
�A�L�E��J�B�D�W�q�J�B�D�W�V�J�B�D�W���N�B�D�W���A�B�D�W�}�J�B�D�\�z�K�B�D�W���A�B�D�W���G�E�D�W�F�K�B�D�\�d\ZF�B�B �B(�A0�A8�M��
8A0A(B BBBG4�l�B�B�V�Z�/He��#�0�������	/ARcu���������&->�ZH]o.annobin_pcre_exec.c.annobin_pcre_exec.c_end.annobin_pcre_exec.c.hot.annobin_pcre_exec.c_end.hot.annobin_pcre_exec.c.unlikely.annobin_pcre_exec.c_end.unlikelymatch_refmatch.constprop.0rep_minrep_max.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_utf8_table4_pcre_utf8_table3_pcre_ucd_stage2_pcre_ucd_stage1_pcre_ucd_records_pcre_OP_lengths_GLOBAL_OFFSET_TABLE__pcre_is_newlinememmove_pcre_was_newline_pcre_ucp_gentype_pcre_xclassmemcpypcre_freepcre_callout_pcre_ord2utf8memcmppcre_mallocstrcmp__stack_chk_failpcre_exec_pcre_default_tables_pcre_try_flipped_pcre_valid_utf8Y"��������`#��������g$��������n%��������u&������������������'���������'��������y)���������	*���������
��%���������$���������&�������������'jf���+��������
)��������k%��������x$��������,���������&���������"���������#��������~���g'h�'g�-��������"��������(#���������'i�.�������� '��������| .��������� /��������##0��������Z$$��������d$%��������p$&���������$"���������$#��������c%,��������r%$��������~%%���������%&��������&"��������&#���������&$���������&%���������&&��������'"��������&'#���������'"��������y($���������(%���������(&��������)"��������)#��������%*,��������+"��������+#���������+"���������+#���������+"��������,#��������{,"���������,#��������-"��������-#��������/"��������/#���������1"���������1#��������>5"��������E5#��������6"��������6#���������60�6"��������E7$��������Q7%��������]7&���������7"���������7#��������88$��������I8%��������P8&���������8"���������8#��������09$��������A9%��������H9&��������c9,���������9"���������9#��������-:$��������9:%��������E:&���������:"���������:#��������R;D�;"���������;#��������0<"��������7<#���������<"���������<#���������="���������=#���������>"���������>#���������?"���������?#��������Q@"��������X@#��������A"��������A#���������B"���������B#��������E"��������!E#��������xE%���������E$���������E&���������E1��������.F2��������lF2���������G��H)���������L��O)��������T3���������U)��������GV"���������V#��������W"��������\W#��������&Y2��������dY2���������Y/��������CZ0��������$['O�["���������[#��������r\��\��\��\��]-���������]��]�'`$��������.`%��������:`&��������A`,���������`"��������a#���������a"���������a#��������Ub"���������b"���������b#��������'c"��������5c#���������c"���������c#���������c"��������d#��������kd"��������ud#���������d"���������d#��������4e"��������>e#���������e"���������e#���������e"��������f#��������Uf"��������cf#���������f"���������f#��������g"��������&g#��������tg"���������g#���������g"���������g#���������h%���������h$���������h&���������j,���������j$���������j%���������j&���������k"���������k#��������;l$���������l%���������l&���������l,���������l"���������l#���������m"���������m#��������6n'��������np"��������up#���������s)���������t"���������t#��������3u-��������wu"���������u#���������x)��������y)��������[y.���������y#���������y"���������z�z"��������d{%��������z{$���������{&���������{"��������&|%��������@|$��������K|&��������W|,���������|"���������|%��������}$��������}&��������P}"���������}"��������?~%��������U~$��������`~&���������~"���������~#���������~"��������#��������`�"��������j�#����������4����������4��������u�4����������4��������H�"��������U���X�%���������$��������%�&��������1�,����������"����������#��������-�"���������"����������'���������5��������Ӎ#��������#���)���������)���������#��������$�2��������ƒ2���������)���������#��������ۓ#��������+�#��������/�#����������"��������	�#��������Ζ#��������E�7���������8��������=�3��������ǟ)���������)��������}�/��������ؤ)��������ϥ9��������;�/����������+����������+����������.��������V�5�������� Z�  �	|	Hl	���\ $�(�,L0P4�8�<�@�D$HL|P�TD
X�\4` dh�l,p�t4x�|T�X�\�`�d�h�T�X��� ����������� �$�������|�X�4��\�`�d�h�l�p����PL��@%D% H%$L%(P%,T%0X%4\%8�"<�"@hD"Hx Ll$P�T�X�\�`Hd�
hPl�
p(tTx(||��4�����%��%��%��%��%�����t��� �����P�T����.��.�.��-�L1��0�$0��/�"2�J2�N2��1�/-�,,�+`*8*&*�)�* p'$\&(H%,>$0a(4�68:<!9@-8DA7H�BLu=P�@T<@Xz?\�>`�Ad�Ah�ClJ2pN2t=x3<|7<��;��;�
J��I�WI��H��F��J��H�H�H�J2�N2��K��K�OK�K�\H�8Q��P�pP�P�R��Q��O�O�O�J2�N2��N�$N��MEM�LF}|�{�z�}�� ��$��(Շ,Ň0}�4�8�<#�@J2DN2H|�Lg�PY�TH�X+�\��`l�d��h��l��p׏t��x��|��J2�N2�7��z��~����a��˜�o�����������ڏ�
������J2�N2�:��}�������d� �0`�.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rela.rodata.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`h'pZ�"@���-ʪ3ʪ=̪T8@0�0	S �b �(]@`�0|H��H�(�@��0�p���@��(�0X�-|��S�����P�ذ�ذ��@��H��p"	��0��pcre_fullinfo.o/1575493209  1667  135   100644  5392      `
ELF>�
@@
��AVAUATUSH��pdH�%(H�D$h1�H���'I��H���H��A��1�H��t�ua�?ERCPue�����A��w'H�
D��Hc�H�>��f��Gf����A�E1�H�t$hdH34%(��H��p[]A\A]A^�fD�?ERCPH�^t�L�t$0H��H��L���H��H����H��IE��p���������H��t��Ct��C(��G%z�I�ED���u���@�GI�E1��c���f��GA�E1��R�����GA�E1��B�����G����G�&���H��t�C1ۨtH�]H��I�]1������������G����G����1�H��t��DH�I�E1����fD�GA�E1����f��GH�1�I�}���fD�G�Ѓ�A�E1��u���@�Gf����A�E1��[���f.��GA�E1��B������������+���������'�������������
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�DiF�B�B �A(�A0�D��
0A(A BBBGi7Tu�����		i-?T.annobin_pcre_fullinfo.c.annobin_pcre_fullinfo.c_end.annobin_pcre_fullinfo.c.hot.annobin_pcre_fullinfo.c_end.hot.annobin_pcre_fullinfo.c.unlikely.annobin_pcre_fullinfo.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..grouppcre_fullinfo_GLOBAL_OFFSET_TABLE__pcre_try_flipped_pcre_default_tables__stack_chk_fail]
��������� ���������!��������e"�������� i  �(<Pl�L �$(�,�0(4D8�< .symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rela.rodata.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`
h'pi"@P
`-�3�=�T8@�
0	S0b0(]@�
0|X�X(�@0��@�@@��0�-|�S���P�@�@`�@��H	�e��pcre_get.o/     1575493209  1667  135   100644  7464      `
ELF>h@@
��AW�AVAUI��1�ATUSH��H��8dH�%(H�D$(1�H�L$��Ņ�u&�D$����H�L$�1�H����Ņ�t+H�T$(dH3%(����H��8[]A\A]A^A_�fDH�L$ �	1�H����Ņ�u��D$L�|$ D�t$�D$D9�}2F�d5�\$L��A�A��Hc�L�H�s���t)~A�l$D9�|ν�����h���D�d$E���fD�+�C����D����f.���AWAVAUATUH��1�SH��H��HH�T$�H�L$H�L$,dH�%(H�D$81��A�ą�u'�D$,����H�L$(�1�H���A�ą�t/H�|$8dH3<%(D���?H��H[]A\A]A^A_�f�H�L$0�	1�H���A�ą�u�D�|$,�D$(L�l$0D�|$�D$E9�}1C�<�\$H�����A��Hc�L�H�s���t+~E�fE9�|�A������^���DD�t$,E���fDI9���HcD$�I��H��H)�H��I��I���	�M�M9�sK�4<H�����t�D$�L$����H�L�I��H9�v%Hc�I��L�h�	�L�I9�vJ�4+H�����t�H�D$L� H�D$D�d$H����I����@SH��H�� dH�%(H�D$1��G
u
�G��H�L$H�T$���~oH�T$H�t$D�D�JH9�wFA��A������<	Hc��<���yZHc��fD��J��ȍHcɋ���yH�H9�v�A����A��A��D�H�\$dH3%(uH�� [����������USH����xR9�}N�Hcɋ��\�)�D9�}+Hc�Hc�H�H��L����(H����[]�f.��������f��������f���AVAUM��ATA��UH��L��SH��D�t$0�����~'E��M��D��H��H��[��]A\A]A^�I���f�[]A\A]A^����AWI��AVAUATUHc�S�\-H��H�������I��I��H����L�L�DD�@H��B�|	+x�I9�u�Hc���I��H��tzM�7I�L�I��I�l�DIcu�A�]H��I��I��)�L�Hc�H���H�I�F�H�K�I9�u�H�E1�H��[]A\A]A^A_Ð���H��H��tI�/�Ҹ������D���%fD��AVAUATUS��xX9�}T�I��L��Hc�L�4��\�A+�{Hc��H��t4Ic6Lc�H��L��L��B�(H�E��[]A\A]A^�D������������f.���AUM��ATA��UH��L��SH��H�������~"H��M��D��H��H��[��]A\A]�=���DH��[]A\A]�f.����%
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�H&F�G�B �G(�A0�A8�Gp`
8A0A(B BBBGHh�F�B�B �B(�A0�F8�G�r
8A0A(B BBBJ ��A�G0�
AA(�gE�A�D w
CAKHYF�B�E �D(�G0�a
(C BBBNA(A BBBHP�F�E�B �B(�A0�D8�H@�
8A0A(B BBBB�
<�vF�B�B �A(�A0�O
(A BBBFH�SF�E�D �G(�G0M
(M CBBJD(A ABB<
�-Ea~������	�&%3:K0�h�g|�@Y������
���v� S�
.annobin_pcre_get.c.annobin_pcre_get.c_end.annobin_pcre_get.c.hot.annobin_pcre_get.c_end.hot.annobin_pcre_get.c.unlikely.annobin_pcre_get.c_end.unlikelyget_first_set.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..grouppcre_get_stringnumber_GLOBAL_OFFSET_TABLE_pcre_fullinfostrcmp__stack_chk_failpcre_get_stringtable_entriespcre_copy_substringmemcpypcre_copy_named_substringpcre_get_substring_listpcre_mallocpcre_free_substring_listpcre_freepcre_get_substringpcre_get_named_substringpcre_free_substring5 ��������[ ��������� ���������!��������""��������o ��������� ��������� ��������%!���������!���������!���������"��������3#������������������"��������%���������(��������?%���������*���������(���������%���������*�������� �   l0���@T������ @�.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`
h'p�"@�-�3�=�T8@�0	SPbP(]@(0|x�x(�@X0�0�-|�S���P� 	� 	P�@��pP	�$x�pcre_globals.o/ 1575493209  1667  135   100644  4024      `
ELF>8	@@


GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�
5
Q

q
�
����7 A2<.annobin_pcre_globals.c.annobin_pcre_globals.c_end.annobin_pcre_globals.c.hot.annobin_pcre_globals.c_end.hot.annobin_pcre_globals.c.unlikely.annobin_pcre_globals.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..grouppcre_calloutpcre_stack_freepcre_stack_mallocpcre_freepcre_malloc   !!.symtab.strtab.shstrtab.group.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rela.data.rel.comment.note.gnu.property.note.GNU-stack@
P`h
"p(p.p8pT3@`0N�]�(X@�0w���(�@�0� �@�`�08-weNe�hP���`	HP�pcre_info.o/    1575493209  1667  135   100644  4192      `
ELF>�	@@
��USH��HdH�%(H�D$81�H��t|�?ERCPH��H��uSH��t�G%z��EH��t�G�t&�G��GH�L$8dH3%(u>H��H[]�D���������ѐH��1�1��H��H��u������빸������
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�,�E�A�D`^
AAF�/He�����	���#.annobin_pcre_info.c.annobin_pcre_info.c_end.annobin_pcre_info.c.hot.annobin_pcre_info.c_end.hot.annobin_pcre_info.c.unlikely.annobin_pcre_info.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..grouppcre_info_GLOBAL_OFFSET_TABLE__pcre_try_flipped__stack_chk_fail���������� �������� �   .symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@
P`h
'p�"@�0-3=T8@0	Slbl(]@80|���(�@h0�0�-|�S���P�@�@H�@��	�4��/0              1575493209  1667  135   100644  5240      `
ELF>x
@@
��AV�@AUATUS�H��H��tq�I��1��I�$���TH��H=u��1�I���%D�H�����H��H��t$I�E�DXu�I�$����[H��]A\A]A^�L��1�H��HDžH��L��A�HDž8H)���@���H�1�I�E��I�EB��� t���D�����H���AI�EB���t!���D������� ��H�AI�EB�f��y#���D��������H�AI�EB���@t#���D��������H�AI�EB���t#���D�������H�AI�EB���t���D����� ��H�AI�EH��H�����x��L�?��t ����D�������@��H�AI�E�x��t!���D�������`��H�AI�EB���t!���D���������H�AI�EB��t���D��������H�AH��_�_��������S���E1�L�%�>�A��_tmL������H��E�B��5@I��I�������I�EB�p�D����f��
��I��D���SE���SE���SEڨt����
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864\*+?{^.$|()[GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�<(F�G�B �A(�A0��
(D BBBA(;Z}�����		(*6L`n�.annobin_pcre_maketables.c.annobin_pcre_maketables.c_end.annobin_pcre_maketables.c.hot.annobin_pcre_maketables.c_end.hot.annobin_pcre_maketables.c.unlikely.annobin_pcre_maketables.c_end.unlikely.LC0.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..grouppcre_maketablespcre_malloc_GLOBAL_OFFSET_TABLE___ctype_tolower_loc__ctype_b_loc__ctype_toupper_locstrchr �������� "��������H#��������Y$���������
���������%�������� (   .symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rodata.str1.1.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`
h'p("@@�-�3�=�T8@�0	S�b�(]@0|�(�@00�2<
�0I-|vSv�xP����X�@` �	�	�x�pcre_newline.o/ 1575493209  1667  135   100644  4848      `
ELF>0@@
��SI��D�E��tVA���~MD��H�
L���?��[E#��A���t%L�W1��A���I����?��A	�9�u�tTA��
��~(A���tO|IA��( A��w<A��[�f�1�A��
|�A�[�f.�A��
t�A��
t1�[�1�E��[����A���H��H9�sG�
�E�A�3[�f�H���H9�s1��
����A�[��@�뾐��SI��H�G�D�O�E��tqD��D�Ƀ�@���uH����σ�@���t�D�ɀ��vD��?H�=L���[E#��A���tL�P1�f�A�:��I����?��A	�9�u�tdA��
~.A���tu|_A��( A��wR�A�[��A��}:A��
u0�H9�v1ɀx�
�����A�[�@A��
t2A��
t1�[øA�[�f�1�E��[����A���H9�v�x�
�E�A�3[�D���
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�8?E��
CU
KO
AF
R^
J^
J8XOE��
Ik
EO
AM
CF
RZ
F�5Qq�����	?&8@O.annobin_pcre_newline.c.annobin_pcre_newline.c_end.annobin_pcre_newline.c.hot.annobin_pcre_newline.c_end.hot.annobin_pcre_newline.c.unlikely.annobin_pcre_newline.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_is_newline_pcre_utf8_table4_pcre_utf8_table3_pcre_was_newline ��������'�������������������������� �   \@.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@
P`h
'p�"@ 
`-�3�=T8@�
0	STbT(]@�
0||�|(�@�
0�0�-|�S���P�(�(��@0�	�J@�pcre_ord2utf8.o/1575493209  1667  135   100644  4280      `
ELF>�	@@
�������;=~~D�@��L�
I���
H�JA9<�}
H�ʉ�I9�u�D�B�Lc�L�I��I�H��f���H������?�ɀ�JL9�u�ƒ���Hc�H�H�B
<�@�>�DE1�H��B
<�@�>�
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx���7Tu�����		�/A.annobin_pcre_ord2utf8.c.annobin_pcre_ord2utf8.c_end.annobin_pcre_ord2utf8.c.hot.annobin_pcre_ord2utf8.c_end.hot.annobin_pcre_ord2utf8.c.unlikely.annobin_pcre_ord2utf8.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_ord2utf8_pcre_utf8_table1_size_pcre_utf8_table1_pcre_utf8_table2����������������&��������� ��������� �������� �   .symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@
P`h
'p�"@�x-3= T8@`0	Stbt(]@�0|���(�@�0�0�-|�S���P�H�H0�@�x	�S	�pcre_refcount.o/1575493209  1667  135   100644  3840      `
ELF>�@@

��H��t0���G��9�~1�1�f�W�f��=��O‰���f�Wø�����
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�??
7
T

u
�
����	?.annobin_pcre_refcount.c.annobin_pcre_refcount.c_end.annobin_pcre_refcount.c.hot.annobin_pcre_refcount.c_end.hot.annobin_pcre_refcount.c.unlikely.annobin_pcre_refcount.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..grouppcre_refcount ?   .symtab.strtab.shstrtab.group.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@
P`h
"p?(�.�8�T3@�0N](X@ 0w,�,(�@P0�0T-w�N���P����0�@��	���pcre_study.o/   1575493209  1667  135   100644  15648     `
ELF>`5@@
AW1�AVA��AUATUSH��(A����H�4$�D$��T$<_�H�_<b���t$�E1�A������D$H�-L�%���t$��<rwA��HcT�H�>���Cf����H����Tt�Hc�A�H����<rv�DA�H���Cf����H�\��C!�H����`�H���3u����A�����A�C"H��"�X�����T$H�4$H���������fA�H��f��Bf����H€:Tt�H�Z�B����H��H�XA���@���@E��x�T$���E9���<T��CE��H��E1��D$����Cf����H؀8T�Q���H�X�@���@A�H���Bf����H€:Tt�H�Z�B�`����{f����H<$�H����Bf����H€:Tt�H9��,H9��#�D$�CH������fD�D$���aE1��CH�s�H������H���3�,�����CH��f����D���E����f��CA����<H���H�\�����CH�Sf����A�E���M�C<��A��?H�5�H���@���A�H�E��t#�C�<�v��?H�5�H�f.��������Cf����A��C��<H���H�\�����fD�C��H�S<A�HB�H������A��H�SE��t�C<��6����CH������CA��H���o����H�C�R����E���N�CA��H���>���fD�C�c����A��H��!�����CH������@E�H�����D�T$H�4$�{���A�����E��<T�����D��H��([]A\A]A^A_��C"H��&f����A�����fD�CH������S�t$�D$H�<$f�����D�D$H��t@H��f.��Qf����Hр9Tt�H9�v/H9�s*�D$�E���H�_���H��(�����[]A\A]A^A_ËT$H�4$H�����A������H��(�����[]A\A]A^A_�f.�I�ʉ��A�����E��A��D��t$I����0tI��0�ȃ���A����D�f.�AWE1�L�=AVM��AUA��ATI��U��SH��H���?_�D$A��O�TM�A���<lw8��Ic�L�>��A�Bf����I�A�:Tt�I��A���<lv�@�D$�D$H��[]A\A]A^A_�DI��A�B��<��H�=��Hc�H�>���I��A�rI�NM�F��H������DA�D$A�f����I�A�<$T�)����r���f.���I�V����`���H��H=�u�I�����f��D$�fD��I�V��`���H��H=�u��h������I�V����`���H��H=�u��6���fD1��@	�������H��I�V�4�H��u܉с�	�@�s��@1��@��	ʈ������H��I�V��H��uځ���	ʈS���@I�V�T�H��H��`u����D�@I�V���T�H��H��`u��c���A�jI��������I������I�rE���t1�f��H��H�� u�A�B!�H���������H��3�.�����A�B"A�R#�������I��&�����A�rI�NM�F��H��L�T$�d���L�T$I�BE����A�RI�€��������?H��I����I��@A�Bf����I�A�:Tt�I������f�I��M��D���H��L��L�T$�4����������L�T$�A�Bf����I�A�:Tt�I���s���f�I�rE�������1���H��H��u︀�D���HcҀ<t����%�����?��S��=u�����L��M��D���H��L�T$���L�T$�������Z���f.�A�Bf����I�A�:Tt����fD�D$���A�rI�NM�F��H��L�T$����L�T$I�BE��t&A�RI�€���2����_���I���$����I�������I��"����������K��C����f�C�C�����@DI�V���T�H��H��`u�I������@f.�I�V�T�H��H��`u�I�����1��D��	ʈ���}���H��I�V��H��uځ���	ʈS��1���	����?���H��I�V�4�H��u܉с�	�@�s�ڸ��I�V��`���H��H=�u�I�����@��AVAUATI��USH���dH�%(H��$�1�H�H���O�?ERCPH���@A�����u�G�oA��W���GHc�H�H������G
uH�G H�D$H���[f�H�D$H��E1�H��H��$�)�$�H�L$H��L�D$H@H�L$ �у���H�D$(��)�$�����S��A��A��H��H����Å�y1�E��uA�\�H����H�P0H�H�P�@0,E��uW�@4��x�H4�XXH��$�dH3%(��H���[]A\A]A^�fDH�I�$1���f�fo�$��@4fo�$�H8PH�DH�H�1��f.�H�5I�4$�k����H�L$1��H�D$�S����
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864argument is not a compiled regular expressionunknown or incorrect option bit(s) setfailed to get memoryGCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�x�B�D�E �B(�A0�A8�D`�
8A0A(B BBBD�
8F0A(B BBBA\8F0A(B BBB�FH�\B�L�E �E(�D0�C8�GPj
8A0A(B BBBFH�F�B�B �D(�A0�G�c
0A(A BBBGS1Ki�����F��\��0���(	9J\r�@���.annobin_pcre_study.c.annobin_pcre_study.c_end.annobin_pcre_study.c.hot.annobin_pcre_study.c_end.hot.annobin_pcre_study.c.unlikely.annobin_pcre_study.c_end.unlikelyfind_minlengthset_table_bit.isra.0set_start_bits.LC0.LC1.LC2.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_OP_lengths_pcre_utf8_table4_GLOBAL_OFFSET_TABLE__pcre_find_bracketpcre_studypcre_mallocpcre_fullinfo__stack_chk_fail[��������b&��������'��������K'���������)�����������|U	'��������a
+���������
����������������#��������=,��������O-�������� S  x�����x| �$�(�,�0�4�8P<D@HD4H�L�P�T�X�\�`�dhl
p8t<x@|����L�P�������������l�����h�������x���������������� �����,�0���t@�H��� �$�(�,�0�4�8<@D�HXL�P�T�X�\�`�d�h�l�pt�x�|��8�����D�.�2�6�:�>�����d�h�l�p�t�x�|���H�$�������T�x�|������������������ �$(,�048 <$@$D(H�	L�	P4T8X@\@`�	d�h�lptx|���� �$�(�P�T�����`�d�d�h�9�t�T�|�|�`�d�h�l�p�t�x�|���	�������	h������ �$�(�,�0�4�8�<�@�D�H�L�P�T�X\ `�d�h�lptx|�6�e������ �x�| �����@.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rela.rodata.rodata.str1.8.rodata.str1.1.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`h'pS"@HP-�3�=�T8@�0	Sb(]@�0|@�@(�@�0�h��@(��2W�2_�0t-|�S���P���@@�3`8P&	��H4pcre_tables.o/  1575493209  1667  135   100644  6160      `
ELF>�@@


GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864�M=%N+3<DMSUiBpsvC{���	�
��>���
��O����%-4=PNXQnR�S�T���D����� �!�E�"�#�U�����	�FG	$
V&)%3&;=
@'L?PSV(\He)p*|W�X�+�,������@�A����I�-��Y�J��./	K0)10283A4HZQ[Z5`6g7n8s9{:�;�L�<����AnyArabicArmenianAvestanBalineseBamumBengaliBopomofoBrailleBugineseBuhidCCanadian_AboriginalCarianCcCfChamCherokeeCnCoCommonCopticCsCuneiformCypriotCyrillicDeseretDevanagariEgyptian_HieroglyphsEthiopicGeorgianGlagoliticGothicGreekGujaratiGurmukhiHanHangulHanunooHebrewHiraganaImperial_AramaicInheritedInscriptional_PahlaviInscriptional_ParthianJavaneseKaithiKannadaKatakanaKayah_LiKharoshthiKhmerLL&LaoLatinLepchaLimbuLinear_BLisuLlLmLoLtLuLycianLydianMMalayalamMcMeMeetei_MayekMnMongolianMyanmarNNdNew_Tai_LueNkoNlNoOghamOl_ChikiOld_ItalicOld_PersianOld_South_ArabianOld_TurkicOriyaOsmanyaPPcPdPePfPhags_PaPhoenicianPiPoPsRejangRunicSSamaritanSaurashtraScShavianSinhalaSkSmSoSundaneseSyloti_NagriSyriacTagalogTagbanwaTai_LeTai_ThamTai_VietTamilTeluguThaanaThaiTibetanTifinaghUgariticVaiYiZZlZpZs�����������������!!GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�
3
N

m
�
����� @�&�x8`@J�\�n����s.annobin_pcre_tables.c.annobin_pcre_tables.c_end.annobin_pcre_tables.c.hot.annobin_pcre_tables.c_end.hot.annobin_pcre_tables.c.unlikely.annobin_pcre_tables.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_utt_size_pcre_utt_pcre_utt_names_pcre_ucp_gentype_pcre_utf8_table4_pcre_utf8_table3_pcre_utf8_table2_pcre_utf8_table1_size_pcre_utf8_table1_pcre_OP_lengths   .symtab.strtab.shstrtab.group.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rodata.comment.note.gnu.property.note.GNU-stack@
P`h
"p(p.p8pT3@`0N�]�(X@�0w���(�@�0� s �0�
-w�
N�
��
P��	����/19             1575493209  1667  135   100644  4176      `
ELF>�	@@

H��H��H����H	��@f.���1��?PCRE��H���oI��I���oON�oW V �GȉF�G�ȉF���f�FA�x���f�FA�x�v���f�FA�x�h���f�FA�x�Z���f�FA�x�L���f�FA�x�>���f�FA�x�0���f�FH��M��t:�Ao�AoaaI�Q H�Q A�Q(�Q(A�ʉA�QʉQA�Q(ʉQ(H���D�
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�0V�!
=
]

�
�
���+ .annobin_pcre_try_flipped.c.annobin_pcre_try_flipped.c_end.annobin_pcre_try_flipped.c.hot.annobin_pcre_try_flipped.c_end.hot.annobin_pcre_try_flipped.c.unlikely.annobin_pcre_try_flipped.c_end.unlikelybyteflip.part.0.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_try_flipped !   4 .symtab.strtab.shstrtab.group.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`
h"p!(�.�8�T3@(0N�]�(X@X0w�(�@�0�08-weNe�hP����H�@�0�	�=��pcre_ucd.o/     1575493209  1667  135   100644  60576     `
ELF>`�@@


GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864																										











																														 !"#$%&$'()***++,-.----/01/01/012/0134567899:;<=>?@ABBCDBEFGHGIJKLMMMNOOPQ-RRRRRRRRRSSSSSSSSS



SSSSSSSSSSSS













RRRRR






S
S
















TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTVWVWSXVWYYZ[[[YYYYYX
\]]]Y^Y__`aaaaaaaaaaaaaaaaaYaaaaaaaaabccc`dddddddddddddddddedddddddddfgghijkkklmnVWVWVWVWVWopopopopopopopqrs`tuvVWwVW`xxxyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{||||||||||||||||}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~��TT���}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~�}~}~}~}~}~}~}~�}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~YYYYYYYYYYY��������������������������������������YY�������Y���������������������������������������Y�YYYYYY�������������������������������������������������������YYYYYYYY���������������������������YYYYY�����YYYYYYYYYYYYY��������������������YY�Y�������������������������������S����������TTTTTTTTTTT���������Y������T������������������������������������������������������������������������������������������������������������������������������������������������������������Y������������������������������������������������������������YY�����������������������������������������������������������������������������������������������������YYYYYYYYYYYYYY�����������������������������������������������������������YYYYY����������������������������������������������YY���������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����������������������������������������������������������YY�������������������Y�TT���YY������������������������YYYYYY�������Y���Y��������YY��YY����������������������Y�������Y�YYY����YY���������YY��YY����YYYYYYYY�YYYY��Y�����YY����������������������YYYYY���Y������YYYY��YY����������������������Y�������Y��Y��Y��YY�Y�����YYYY��YY���YYY�YYYYYYY����Y�YYYYYYY����������������YYYYYYYYYYY���Y���������Y���Y����������������������Y�������Y��Y�����YY����������Y���Y���YY�YYYYYYYYYYYYYYY����YY����������Y�YYYYYYYYYYYYYYY���Y��������YY��YY����������������������Y�������Y��Y�����YY���������YY��YY���YYYYYYYY��YYYY��Y�����YY������������YYYYYYYYYYYYYYYY��Y������YYY���Y����YYY��Y�Y��YYY��YYY���YYY������������YYYY�����YYY���Y����YY�YYYYYY�YYYYYYYYYYYYYY���������������������YYYYYY���Y��������Y���Y�����������������������Y����������Y�����YYY��������Y���Y����YYYYYYY��Y��YYYYYY����YY����������YYYYYYYY��������YY��Y��������Y���Y�����������������������Y����������Y�����YY���������Y���Y����YYYYYYY��YYYYYYY�Y����YY����������Y

YYYYYYYYYYYYYYY��Y��������Y���Y�����������������������Y����������������YYY��������Y���Y����YYYYYYYYY�YYYYYYYY����YY����������������YYY�������YY��Y������������������YYY������������������������Y���������Y�YY�������YYY�YYYY������Y�Y��������YYYYYYYYYYYYYYYYYY���YYYYYYYYYYYY����������������������������������������������������������YYYY����������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY��Y�YY��Y�YY�YYYYYY����Y�������Y���Y�Y�YY��Y�������������Y���YY�����Y�Y������YY����������YY��YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������������������������������������������������������������������������Y������������������������������������YYYY���������������������������YYYY��������Y������������������������������������Y���������������Y�������



YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYY��������������������������������������������YYY���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Y����YY�������Y�Y����YY�����������������������������������������Y����YY���������������������������������Y����YY�������Y�Y����YY���������������Y���������������������������������������������������������Y����YY�������������������������������������������������������������������YYYY������������������������������YYY��������������������������YYYYYY�������������������������������������������������������������������������������������YYYYYYYYYYY����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYY																		


YYYYYYYYYYYYYYYYYYYYY












Y


YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����������������������������������������������������������������������YYYYYYYYYYYYY   !!!!  !!!YYYY!! !!!!!!   YYYY"YYY##$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%YY%%%%%YYYYYYYYYYY&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&YYYY'''''''''''''''''&&&&&&&''YYYYYY(((((((((((YYY))********************************+++++++++++++++++++++++,,---YY../////////////////////////////////////////////////////0101111111Y10100111111110000001111111111YY12222222222YYYYYY2222222222YYYYYY33333334333333YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY5555677777777777777777777777777777777777777777777777565555565666665667777777YYYY88888888889999999::::::::::555555555:::::::::YYY;;<==============================<;;;;<<;;<YYY==>>>>>>>>>>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY????????????????????????????????????@@@@@@@@AAAAAAAA@@AAYYYBBBBBCCCCCCCCCCYYY???DDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTTTTTTTTTTTTTTTTHTTTTTTTIIIITIIIIHYYYYYYYYYYYYY`````JRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRZZZZZ`````KLMRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRZTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTYYYYYYYYYYYYYYYYYYYYYYTTTNOPPPPPPPPQQQQQQQQPPPPPPYYQQQQQQYYPPPPPPPPQQQQQQQQPPPPPPPPQQQQQQQQPPPPPPYYQQQQQQYY`P`P`P`PYQYQYQYQPPPPPPPPQQQQQQQQRRSSSSTTUUVVWWYYPPPPPPPPXXXXXXXXPPPPPPPPXXXXXXXXPPPPPPPPXXXXXXXXPP`Y`Y``QQZZ[X\XXX`Y`Y``]]]][XXXPP``YY``QQ^^YXXXPP```s``QQ__wXXXYY`Y`Y````aa[XXYbbcdYYYYYRYYRYRRRRRYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTTTTTTTTTTTTTeeeeTeeeTTTTTTTTTTTTYYYYYYYYYYYYYYY

f



f

gfffggfffg
f


fffff





f
h
f
ijff
gffkfgIIIIg

ggfffgggg


l
mmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnoooooooYYYYYY





















































































































































































































































































YYYYYYYYYYYYYYYYYYYYYYY






































YYYYYYYYYYYYYYYYYYYYYYYYY










YYYYYYYYYYYYYYYYYYYYY

























ppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqq


































































































































































































































































































































































































































































Y


















Y
YYYY























Y



Y



YY



























Y


































Y
Y



YYY








YY







YYY























Y













YYYYYrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr

















































YYY









YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYsssssssssssssssssssssssssssssssssssssssssssssssYtttttttttttttttttttttttttttttttttttttttttttttttYuvwxyz{|}R~~opopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopop������opop���YYYYYYY���������������������������������������������YYYYYYYYYY������������������������������������������������������YYYYYYYYY�YYYYYYYYYYYYYYYY�����������������������YYYYYYYYY�������Y�������Y�������Y�������Y�������Y�������Y�������Y�������Y��������������������������������SYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY��������������������������Y�����������������������������������������������������������������������������������������YYYYYYYYYYYY����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYY











YYYY
�I�


���������TTTTTTSSSSS

����I

Y��������������������������������������������������������������������������������������YYTT

���������������������������������������������������������������������������������������������S���YYYYY�����������������������������������������YYY����������������������������������������������������������������������������������������������Y











������������������������YYYYYYYY



































YYYYYYYYYYYY�����������������������������������������������Y






































�������������������������������























































�����������������������������������������������Y����������������������������������������������������������������������������������������







































��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYY































































����������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYY�������������������������������������������������������YYYYYYYYY����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYY}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~YY}~}~}~}~}~}~������YYYYYYYY���K}~}~}~}~}~}~}~}~}~}~}~}~YYYYYYYY����������������������������������������������������������������������������������������YYYYYYYY






















SSSSSSSSS

R�S

YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY-----��������������������������������������������YYYY


YYYYYY��������������������������������������������������������YYYYYYYY���������������������������������������������������������������������YYYYYYYYY������������YYYYYY����������������������������YYYY������������������������������������������������������������������������������������YYYYYYYYYYY������������������������������YYY������������������������������������������������������������������������������Y�����������YYYY��YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������������������������������YYYYYYYYY��������������YY����������YY��������������������������������YYYY�������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYY�����YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����������������������������������������������YY����������YYYYYY������������������������������������YYYYYYYYYYYY�����������������������YYYY�������������������������������������������������YYYY��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YY��������������������������������������������������������������YY����������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�����YYYYY��������������������������Y�����Y�Y��Y��Y������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYY����������������������������������������������������������������YY������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������
YYTTTTTTTTTTTTTTTTYYYYYYTTTTTTTYYYYYYYYYYYYYYY�����Y���������������������������������������������������������������������������������������������������������������������������������������YYY																										

����������S���������������������������������������������SS�������������������������������YYY������YY������YY������YY���YYY

Y


YYYYYYYYYY

YY������������Y��������������������������Y�������������������Y��Y���������������YY��������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY���������������������������������������������������������������������������������������������������������������������������YYYYY
YYYYYYY








���������������������������������������������������������������������������YYYYY











YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY












































TYY�����������������������������YYY�������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������Y����YYYYYYYYYYYY���������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������������������������������Y�������������������������������������YYYY��������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY��������������������������������������������������������������������������������������������������������������������������������������������������������������YY����������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������YY�Y��������������������������������������������Y��YYY�YY�����������������������Y���������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����������������������������YYY���������������������������YYYYY�YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����Y��YYYYY��������Y���Y���������������������������YYYY���YYYY���������YYYYYYYY���������YYYYYYY��������������������������������������������������������������������������������������YYY�����������������������������YY���������������������������YYYYY���������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������Y������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY





















































































































YYYYYYYYYY






































YY



























































HHTTT


HHHHHHTTTTTTTT

TTTTTTT





























TTTT















































YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY






















































































YYYYYYYYYYYYYYYYYYYYYYYffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffgggggggYggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggfYffYYfYYffYYffffYffffffffggggYgYgggggggYgggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffYffffYYffffffffYfffffffYggggggggggggggggggggggggggffYffffYfffffYfYYYfffffffYggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggggYYfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfgYY











































YYYY



































































































YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY






























YY
YYYYYYYYYYY
Y
YY
YYY
YYY




YYYYYYYY
YYYYYYY
YYYYYYYYYYYYYYYYYYYYYYYYY
Y

YY
YYYYYYYYYY



YY
YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY

































YYYYYYYYYYYYYY








YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY���������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�����������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTYYYYYYYYYYYYYYYY������������������������������������������������������������������������������������������������������������������������������YY	

 !""#$%&'((()*+,-./0123456789:;<=>?@AABCDEFGHIJKLLAMAANOPQRSTUVWXYZ[\]F^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^_^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^`abbbbbbbbcddefghijklmno"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""pqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr^^stuvwwxyz{|}~�������������������������������F������������������^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^�^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^�^^^^����rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr�rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr�									
!	 		!��	!				�	!y!	!����!	9���!���!	����!���!�!	�!	�!	�!	O!	�!	�!	�!a!	�!	�!�!	�!�!	�!	�!	�!	�!!8!	!����!����!����!	����!	���!	~���!	+*!	]���!	(*!?*!	=���!	E!	G!*!*!*!.���!2���!3���!6���!5���!1���!/���!-���!�)!�)!+���!*���!�)!&���!����!'���!����!%���!	T	����	�	&	%	@	?	 ������������������	������	����������
	
������������	�������	����	~���	P	 ������	����	�	0���
111177?
?????YYYY







++
++
+555
5
5556
666
66

$
$$$
$$/
///88888
    
99999
9999
&&
&&
&&	`((((--2233

%%%%%
%%"""
"""
4''
'
'
ZZ
ZZ
ZZ==
==
==KK
KK
EE
EEE
H
HHH	
	!�!�!���!	A�	����JVd�p~����		����������	����	����	����	����	����						��!	A��!	���!	!��!!�!		��	0���!		��!	�!	��!���!���!	���!	��!	���!	���!	���




��::<<<UUULLLL
NNNN!	�u��000
0@@J
JJJJ
D
DDDIII
ISS
SSSS
CCC
C
C&[[[[VV
VVV
		#FB));;***
	(
���.,,
PPPAAAGGWWWMMRRQQXTT
TTT>>>OGCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�
-
E

a
~
��������"
�0.annobin_pcre_ucd.c.annobin_pcre_ucd.c_end.annobin_pcre_ucd.c.hot.annobin_pcre_ucd.c_end.hot.annobin_pcre_ucd.c.unlikely.annobin_pcre_ucd.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_ucd_stage2_pcre_ucd_stage1_pcre_ucd_records   .symtab.strtab.shstrtab.group.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rodata.comment.note.gnu.property.note.GNU-stack@
P`h
"p(p.p8pT3@��0N�]�(X@ �0w���(�@P�0� 0� �0P�-w}�N}����P�����	�����/39             1575493209  1667  135   100644  4392      `
ELF>h
@@
���V����I����UH��SH�D�E��A����A�����L�у�?�9�������)�H�pA���P�Ճ�@����
�������~��tYH���@�����u=H����)�ʅ��A�S�E���e���������A��H��A�S�E���G�����f�)�[]�A��>��H��A�S�E��� ����A���toA���ku���wd��d���DA���tWA���u���w>��>�����?�����t=H��H���8u�H)��ƍP����f���0u���[])��@�� t���ø�����
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�<�X�D��
ADp��h��H
AGO���;Z}����	�&.annobin_pcre_valid_utf8.c.annobin_pcre_valid_utf8.c_end.annobin_pcre_valid_utf8.c.hot.annobin_pcre_valid_utf8.c_end.hot.annobin_pcre_valid_utf8.c.unlikely.annobin_pcre_valid_utf8.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_valid_utf8_pcre_utf8_table4�������� �   .symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@
P`h
'p�"@�-3=T8@�0	Slbl(]@	0|���(�@0	0�0�-|�S���P�@�@X�@`	��	�8x	�pcre_version.o/ 1575493209  1667  135   100644  4024      `
ELF>�@@
��H��
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h8648.02 2010-03-19GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�5Qq������	.annobin_pcre_version.c.annobin_pcre_version.c_end.annobin_pcre_version.c.hot.annobin_pcre_version.c_end.hot.annobin_pcre_version.c.unlikely.annobin_pcre_version.c_end.unlikely.LC0.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..grouppcre_version
��������    .symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rodata.str1.1.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`
h'p"@�-|3|=|T8@0	S�b�(]@@0|���(�@p0�2 �00-|]S]�`P����0�@��	���pcre_xclass.o/  1575493209  1667  135   100644  5496      `
ELF>8@@
��AWAVAUATUS��ˆD$�����������B���GI����H���������0H�F!@��D�WL�-��DI��L�%H�-��H�L�=D�A��A��Mc�A)Ӑ������H�������C�4��D�Hc�A�tuH�t����NHc�H�>��H�F���\���H�F!�S����v@8p������8�ucH����������D$�[]A\��A]A^A_�f��vL�5�HA9���v�N�����@��@��	���8�u���D$��Ѓ�[]A\A]A^A_���u����6�d������H�����Y9���������L�@�����I�@E�D��A�����9������A9�������o������?H�D�
C�@C#4�E�p����E��t�E1�I��fDI��A�P�����?��	�D9�u�J�D0�U���fDH�F�����H�
��?D�C�IG#�E�q��D�t$�A��E���M���E1�I��@I��A�Q�����?��A	�D9�u�T$�H�D�������?H���RA#4�D�r���������E1�L��DH���B�����?��	�A9�u�O�D0���1��R���
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�\8F�B�B �B(�A0�A8�#
0A(B EBBCK
0A(B BBBD83Nm�����	�8
,>Pb.annobin_pcre_xclass.c.annobin_pcre_xclass.c_end.annobin_pcre_xclass.c.hot.annobin_pcre_xclass.c_end.hot.annobin_pcre_xclass.c.unlikely.annobin_pcre_xclass.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_xclass_pcre_ucd_stage2_pcre_ucd_stage1_pcre_ucd_records_pcre_utf8_table3_pcre_ucp_gentype_pcre_utf8_table4a��������q ��������x!���������
���������"��������G#��������$���������$���������$�������� 8  �\H� .symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rela.rodata.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`
h'p8"@H�-�3�=�T8@ 0	S�b�(]@P0|$�$(�@�0�L�@�x�0`-|�S���P����x�@(
Xx	�	t@
�/58             1575493209  1667  135   100644  4752      `
ELF>P@@


GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864	

 !"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~�����������������������������������������������������������������������������������������������������������������������������	

 !"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~�����������������������������������������������������������������������������������������������������������������������������>�~~�����������������������������������������x������������������GCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�
;
Z

}
�
���@.annobin_pcre_chartables.c.annobin_pcre_chartables.c_end.annobin_pcre_chartables.c.hot.annobin_pcre_chartables.c_end.hot.annobin_pcre_chartables.c.unlikely.annobin_pcre_chartables.c_end.unlikely.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..group_pcre_default_tables   .symtab.strtab.shstrtab.group.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rodata.comment.note.gnu.property.note.GNU-stack@
P`h
"p(p.p8pT3@�
0N�]�(X@0w���(�@@0� @ �0`-w�N���P����	�	*p�usr/lib64/libpcre.so.0.0.1000075500000630130150403561430010722 0ustar00ELF>�@)@8@�� h�h�"h�"�� ����"��"��$$���  P�td � � ���Q�tdR�tdh�h�"h�"��GNU ��j':z"][��:�lD(;s%	�  @�PH�P�
A @b	�����(LP� �	 " !"#&()*,.1567:<=>@A.W���&�t�5υ����q3����tq\��7��{��NE���cGs�ng������씒��"_n��1�q9���]��:���7�!�2Vs�:��Hu���qX����#:�vTi��|-S>EX[��^O؊����.p��0BE��w�֓���ˇ� ,W�1gb-W�<��(f��'/W�m�� k��VB Ow:, F"K_U`�@ys�`����?���
������O���?�#��#���(#�0�Sr#1���pЮg% �j� ����~��]0�@�����"S0���@�Y���p<t� #�������0�`�q0#=�&#~��@+���������I� #�@�/��i���
����x������;P�(g��__gmon_start___ITM_deregisterTMCloneTable_ITM_registerTMCloneTable__cxa_finalize_pcre_utf8_table4_pcre_utf8_table3_pcre_OP_lengthsstrncmp__stack_chk_fail_pcre_is_newline_pcre_ucd_stage1_pcre_ucd_stage2_pcre_ucd_records_pcre_utt_size_pcre_utt_pcre_utt_namesstrcmp_pcre_find_bracket_pcre_ord2utf8memmove__memcpy_chkmemcpymemcmppcre_compile2strlenpcre_malloc_pcre_default_tables_pcre_valid_utf8pcre_freepcre_compilepcre_config_pcre_ucp_gentypepcre_callout_pcre_was_newline_pcre_xclasspcre_dfa_exec_pcre_try_flippedpcre_execpcre_fullinfopcre_get_stringnumberpcre_get_stringtable_entriespcre_copy_substringpcre_copy_named_substringpcre_get_substring_listpcre_free_substring_listpcre_get_substringpcre_get_named_substringpcre_free_substringpcre_stack_freepcre_stack_mallocpcre_infopcre_maketables__ctype_tolower_loc__ctype_b_loc__ctype_toupper_locstrchr_pcre_utf8_table1_size_pcre_utf8_table1_pcre_utf8_table2pcre_refcountpcre_studypcre_versionlibc.so.6_edata__bss_start_endlibpcre.so.0GLIBC_2.14GLIBC_2.3.4GLIBC_2.4GLIBC_2.2.5GLIBC_2.3����ti	#ii
/ui	9ii
Eh�"�p�"`x�"x�"X�"`�"1h�"p�"x�"!��"6��"*��"��"��">��"%��"?��"C��"�"'�")�"-�"�"�"��"@###
#
��"��"7��"��"(��"&��"#��"��"�"�"�"9�"�"	�"<��"+�"
�"�"�". �"4(�"0�"8�"0@�"H�"P�"��H��H��"H��t��H����5��"�%��"��h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h	��Q������h
��A������h��1������h��!������h
��������h��������h������h�������h��������h�������h�������h�������h�������h�������h��q������h��a������h��Q�������%-�"D���%%�"D���%�"D���%�"D���%
�"D���%�"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%��"D���%}�"D���%u�"D���%m�"D���%e�"DH�=)�"H�"�"H9�tH�N�"H��t	�����H�=��"H�5��"H)�H��H��H��?H�H�tH���"H��t��fD�����=��"u+UH�=z�"H��tH�=��"�����d������"]������w����L�AVAUA��ATU��SI�YA�AA����=�����L�~�"��?E�
L���"C�[C#�M��I���E�c�E1���E��tqf�I��E�J���A��?A��D	�D9�u�N�#J�\#��tH�HЃ�JwFLc�L���G�ZE��uD��H�1L�5��A��Oc�M�>A��fDI����u��[]L�A\A]A^�fD[D��]L�A\A]A^�E��uOD�L���C�L��M��t&@I��E�X���LH�L��C�u���1��	�n9��fD�S�A��7��A��0��PЀ��{����SB�D�ЍJЀ��XL�K�D��E���C���=��8����3�-����<{�cL�E�Hc�A���I�q��`����@�7�0N�A�I)�A�
��I������`�=�D�7)����D�A��<��A��'��E1�L�ƾA��{�E1�A��-uE�AA�I��A��H��A��
1��	fDM����M�AD�AA�HA�C�Hc�A�*u߅�xlE���|I����}�l�9�������u-�I�������%�����@��������H��P�I�ك�Bƒ�@�����=M�����H��E1�1�A�0L�ӽ�7�Lc�C�tH����uQ��0uL�1�Lc�C�u�}t1��o�����xA��I��҃��A9��S����"I���E����A������`~
�D�7)��Ѓ�A��AL�)��o���D�CH�KA��}�E���A���H�������<}��<-t�A�u޸������L�K�1����M�ȅ�tmE����9����M�����Ⱥ0��A�7M�)��x����� �7�{����=��M���[���E��tI��1�A��}�#���M���:1�M���3���I��A�������M��)�����A��1��D)غ��M��)‰����I����I�����I��A�L���E�����������H���8@��StDwZ@����@����@������t�xD�A1�A��t�>H���8@��Su�f�L�i�"A�<8H���@��YrV@��[w<��tL�D�@fA��E��L��8@��Tt�L��"A�<8H��U���f�D�G�A��v��D��u����AWA��A�����AVI��AUA��ATE1�UH�-b�SH�_H���G�<hw>��HcT�H�>��fD�Cf����H��<Tt�H���"�H��<hv�H�������[]A\A]A^A_�f.��Cf����H�\��C!�H���wD�H���3u���t/�C"�S$f��f����f9�u��C&A�H��&�G����H��!A���3��������H��[]A\A]A^A_�@H��H�XA���@����@A���t	E9��3���<T���CE��H��E1�����fD<_H�{L��D��HE�D��D�D$�����x�D�D$A�H��@�Bf����H€:Tt�H�Z�B���E���3�{f����I~(H��D�Bf����H€:Tt�H9�v	H9������H��L��D��D�D$�������A�H���D�D$�
���f.��CH�Sf����A��C��<HB��CH������@�CH�Sf����A�A��t]�C<�vUH�
>�"��?�H�����@A��H�SA��t�C<�w��CH���k���H�C�J�����CH���L���D���������������f.�AWI�ϹAVA��1�AUL�-�ATUH��1�SH���L�%��"A�H���I��H9���A��؃�X���S�������r����R�I�S�����E�HfA��fE��tE��`��E��M�A���T��M��Hc۹1�1�A�<L��d���I��H9��r���H���[]A\A]A^A_�@A�@f����I�A���Tt��fD��<Qw���IcD�L�>��Hc�A�I�DA�@f����I�A���Tt��^���M��L��L��D��H��L�$���L�$��t A�Af����I�A���Tt�����A�Af����I�A�9Tt�H��1�[]A\A]A^A_�f�E�HfA��E��MO(fA�y�����L��D��H��L��L�$L�L$����L�$�������L�L$A�Af����I�A�9Tt��A�@f����I�L����H����l����H��3�b������T���f�z�O����D����E���:���A�@<��-���H�;�"��?�I�����@A�@I�P��<LB����f�I�P!�i����E������A�@<�������A�@I�P��<LB����f�AWI��AVA��AUA!�A��ATA��USH��H���H�-��"�_@��X������]���Jǃ�����B���B���v�ux��us�A�Gf����I�A�<T���|1ɺH��L��~�����у�^tL��_�{����H���f����f�� D��C�D	�H��H���"�����u�H��1�[]A\A]A^A_�DD��D���Ӏx
u�E���a������H���[]A\A]A^A_�@AWAVI��AUA��ATA!�A��U��SH���H���"�s�������`����^�}��_����Xto��]tj�Bǃ�����B������@A�Ff����I�A�<T�E�<L���Ѐ�St	v���d<w�Hc��H������D���.�����u�H��[]A\A]A^A_�H���f��O����f����f�� C�D	�������f����@�u	E���Q���H��1�[]A\A]A^A_��GL�<Su�CSI�A���d��vЉ�D��L��������b���@A�Gf����I�A�?Tt�I���Ѐ�Stw#�������H����Ѐ�Su�Hc��H��у�d<�w���Hc��H��f�H���[]A\A]A^A_�f�f.�AVI��AUA��ATI��UH�-�S�������I�~L�������Q�Dwb��HcT�H�>��@H��E��tG�@���tN9�u:A�Ff����I�A�>Tt��(1Ҁ�XH�����p�����x���uɉ����������[]A\A]A^ÉÀ�A�$D�말AWI��AVAUE1�ATM��UD��SH��8E�H�T$�L$H�H�4$dH�%(H�D$(1�H�L$ ���(�b����H�D$ E��D�T$H�l�H�D$�\���[����#u
@������(�o��)��H�D$ ��|uE����H�HH�L$ �P���+��\u�H�AH�D$ �Q��t ��Qu��H��H��H�D$ ���u5I�A�����H�\$(dH3%(D���H��8[]A\A]A^A_�fD��\u�H�AH�D$ �yEu��g����H�t$ 1Ґ�NH��\ti������^��H�FH�D$ �N��\tA��]�H�T$ H�BH�D$ �J��]���������\�H����@��xEH�pt�H���������u�H�pH�t$ �_�����L$H�T$M��A��H�4$H�|$��������H�D$ �8�}�������A�$�|$A9�A�<$DL��`����H�D$ H�P�DH����
�;���H�T$ �
H�Є�u��r����H�<$H�P�Gs����������A<?�t<*tHE�BE�D;D$uH�|$�2���H�AE1�H�D$ �QH���\���fDH������H������QH�AH�D$ ��(�W�Q��Pu
H�AH�D$ �Q��<��H��E1�'����E�BE�$D;D$uH�|$�����H�p�>H�t$ �8��<D��P9���H��f�H��H�D$ H���9�u�H�|$tHcT$H)�H9��9H�L$ E1�����A������5���H�BH�D$ �J������H�€�Q������H��H��H�D$ ��������\u�H�AH�D$ �yEu�����Q��|����H�AA�H�D$ �QH�����E��t
E94$}E�4$I�A���������y?�QtF��t%��)t H���f���)tH�L$ �H��H����u������H�HE1�H�L$ �P���H��E1��~����H��=t	��!�j���H��E1��i���A������H�|$D�D$D�T$���D�D$�������H�L$ E1�D�T$��#���H���p������AWI��AVA��AUA��ATUSH��H��(dH�%(H�D$1�H�G0L�d$H�l$�D$H�D$�f�H�T$H�JH�L$�:tM��E��D��L��H��H���F�����~�H�t$dH34%(uH��([]A\A]A^A_��N�@f.�AWE��AVAUE��ATA��US��H��8H�L$L�t$pL�D$A��|$dH�%(H�D$(1�L��A���I�vI����������#t��f�H9���H��H�xH�|$�H����A���I�V8��u�Mc��L)�L��H9�w�A8��u�A��t
A���8Hu�Hc�I�vH�H�D$����p���H�PH�T$�HH���N���fDA��H���-�H�|$���Y���A�����I�vH������@��\�I�V�<
��L�@L�D$�(��tY���~OH�
P�"��?H�5��"�<�#,��w����t)H��1�H�D$�P���H����?��	�9�u�M�D0A�E����I�NM��������<#t�f.�H9��I��I�xH�|$A�@���.A���I�V8��u�Ic��H)�H��H9�w�A8��u���tA���A8@u�L�7I�NL�D$A����p���I�PH�T$A�@I���M���D<*td<?t`�H�=.�L�������tF�D$��������w2H���Hc�H�>��I�VHc��*uf.�1�H�\$(dH3%(��H��8[]A\A]A^A_�A��L���
�H�|$������Ic������@I�NI�����A�V`H�t$$E1�D��H�|$�D$$���ŋD$$���u���H�D$L�@L�D$A�E�����"������L���H�-�Hc�H�>��D9��/���A���#�����������I�FHc��,(1�A9������A���0���'H�\$D�#D��A���~DH���"��?�4H���"�vD#$��A��tH��1�H�����H����?��A	�9�u�D9����������I�FHc��,(1�A9����f���A��~��t�H�\$D�#D��A���~�H��"��?�4H�G�"�vD#$��A��t�H��1�H�����H����?��A	�9�u�덁���#���
���1��|$�������������I�FHc��(����������������I�VHc��*�����������q�M�� �b�|����P����D1��|$���@�������2���I�FHc��(�Ѓ������������I�FHc��(��������������I�VHc��*��������A��~^��tYH�\$D�#D��A���~DH�q�"��?�4H���"�vD#$��A��tH��1�H�����H����?��A	�9�u��؃����\���H���Hc�H�>��E���������	����4��������&�����H�����������A��/ �6�IA��_ �#A��0�1�����������U
���������1����������1���������E���������	�������E���������	����r����M1����d�����H����T���1�������G���A��( A���G1�������)�����/ t~#��_ t��0�����1��|$������� ��
�������	������Ձ�( �����1��|$������D9���������*���I�FHc��(D9����������H�u�"���������H���"�BH�*�"l�����H�@�"��H��������H�v�"H��BH���"l��!���I�FHc��(D9����������:��A���t�����A��
A�������1����������A������I�FMc�B� �Ѓ����A�������I�VMc�B�"���������A�������I�FMc�B� �������n���A���b���I�VMc�B�"�M����F����A���9���I�FMc�B� ����������A������I�VMc�B�"��������A��t$����A�� t~#A���t
A������1���������A��	������A�� A��
�������@f.�AWAVAUATUSH��XH�H�T$H�|$�PL�pH�L$dH�%(H�L$H1Ʌ�����{���T$ �D$!H�D$1�L�l$ L�0H�B�"�(�<fD�+H�=�"�Hc�I��H�@L�<GL��A�7H5 �"�[����tG]D��9�|�H�D$�/H�D$L�0�����H�L$HdH3%(��H��X[]A\A]A^A_�A�GH�L$�A�G��DA�\$럐�P��^tT1����}t>�T H��H��tA�V��Hc�I����u�H�D$�.H�D$L�0������g����D4 ����L�p�P��>��@f.�AW��I��AVI���AUI��ATA��UA���S�A��…�t^��R����Pt`L���"�H�H�A���wbH��H�������?tA�ZI�J�S�LB�I�A��…�u�[]A\A]A^A_�DA�Bf����I��t���@I�E���d�����+�[���H���H��L���J���A�B�<��=���H���"��?�I��'����A�ZI�J�S�LB��c����M�I�]M9�sBA�M�zL��f������H�I9�u�UD�f������H�I9�t;H��L9�r�A�Bf������H�H9�w����A�BA�ZI�������"�JI9w�I�]빐f.���I���A�@D�D��E��tnA��PtnA��\�|A��_��L�
��"A�H�Mc�G�����L��H�����?t�GH�O��<HB�L�D�D��E��u�1���Gf����H��m���D����H�a�"�@\H��M���D�Gf����9���H�6�"�@_H��"���f.�L�<+�
���������M�щ�I��M�����G�<����H�
��"��?�H�����f.��GH�O��<HB�����f.�H���f�f.�AWAVAUATUSH���H�H��$ D��$��|$L��$8H��$�H��$(�t$`L�	H��$�H��$@H��$�H��$�L�D$xH��$�dH�%(H��$�1�A�CpH��$�H��$h��$�H��$0H��$`��$����$0DŽ$ �;_uB�CE1�fD��$Zf�$f����f��$XI�C@��$ H��$PH��$PI�C@��$E1�H��$�HDŽ$�H��$��PfD�v�A�{`Hc�H��$�L�4H��$0��$�HE‰�$�M��H�D$0�������$���$��D��$E��t��$�A�C`�D$��;D$`t��$0I��A�D$�A�D$���$���t*I�D$�\A�D$I����$0H��$�fA�\$��D$L�d$@M��L��DŽ$4��L�d$H��L��$H�D$?1��\$?�|$d��%��@��E1�E1���E�$�L�Ӊ�$�������$�%HDŽ$��D$T�����H�D$DŽ$��D$8�D$ �D$P�$�D$�[f.�A��\��L��$HA�yE��D�t$I���D$8H��L��I��I��L��D�t$I��L��$HH��H�E E�)H�H�|$0L��E����
I9���
H�D$@H�t$0I9NjLB�L��H)Ƹ�)�H�H9���H�D$0�H���y
L�|$@H;\$H�1
E�����D$8�)���A�Eփ���	A��?��	A��{�H	H�|$�
RA�F��D$A�����D$�D$���D$X��H�EE1���
A��#�FA���D$��D �t<�S���E1�L�|$A��H��$HfA�?I��H��H+M0H��A�O�H+U0A�W�fE�G�A��|�0H�=��D��Hc�H�>��@L��$HH���.�D$ �D$XA�����E1��D$�D$P�$H��$@A�A<+�
��$�E1ۉ|$h<?u��$�I��L��$H�D$h�;�G�<�;@����@����D���D$pE��uE����*D��@����K�sD�$$E����	�T$hT$pE����A����"D$?A����8A���6��%fA��L�{�fD�k���E�I�_���t
�H��@�s�E���t	H��$@���A����'����4��I��I)�E���?	�8�W� �H'H�
���Hc�H�>��@H�EHH��$�H��$HH�PH��$H�P��*�C��?���D$���|$M��E1ɸ^E1�E1�A�^L��$8A�H�|$0H��$4�L��$@D�mxHD�DŽ$4�t$H��$HH��$@H��PU��H��$xPAPH��$dPARASL��$�����L��$�H��@���?A��`uH�|$0� A�9)�9=H�D$0H��� ���$4����)�9���H�\$0�A�D�$$A�^U��D$PI�_fE�GL��$HM���$��$D�$$�D$P�g���A�I�_�D$PD�$$E1�L��$H�$��E����H��$HE1��PL�HH��Hc��0t+f�C��I��D�tJ�A�Hc��0u�A�����O��}�fA�QI�I��}�\!Hc�E1�0t'C�t�H��D�lr��1H��0u�A������*H�D$xE9��;$�I�ɉD$X�D�D$t�\$��������EÉD$I�_A������Es��:�]��$��D$���e	A���$�I�_�S����$���
���
��|$�L��$H������C�D�d$TDexA	ċD$�D$ �S���L�d$x�U`L��$HE1��L$L��L���^��E�$$A��E����$E���%A�����|$?��H��$�D�������T$TI�_��������	ʹ�D$tD�t$f.�D�t$�A�������1�H��$���<H��H��@�{�9����$���
����Mt �|$�L��$H�������L$T�L$����	D�c��D$ ����Dex�5���DL��$HA�q@��=weH�@$H��sUA�II�Q��u$�FfD��]t;�B@8��nH����t&����\u��B<]u��BH��H����u��E1�H���� @E���o	A��^�e	A�I��I�qH��$HE�aA��\u�A�yEI�qt۹H����€����|JI�q�@L��$HH����'�D$ A�E1��D$X�D$�D$P�$�;���f��\$�����D�$$���E�D$I�_�ljD$�����|$ A�L��$H����@L��$HH���G'A�����A��D$X����fDH�D$0I��L��$HH��t%���L��H+L$@)�H�H9��/H�D$0ʉA�Cp��$��ƒ�;�$�A�C`DT$`9�MljT$`H��$���$���A�9|�5'H��$���$0Hc�$H�L�dI����@D�t$D���A�����$���"T$?����I�_�D$���A�D�t$��fD�D$�D$ D�$$����H��$HH����V�������~H�VH���8tf�H���:H���8u�@��}tR@��,�h����r@��}t>�0�T����zH���8tH��H���:H���8u�@��}�&���fD�D$D�t$���D$XuVE1����fDL��H�|$HH��D�l$H)����H�D$H�L$H)�I)�H��L�|$@���L�|$HL�|$@����H�EA�����L��$H����H9EH�U���H�D$x�4H��$�L�1�H��$�dH34%(��HH���[]A\A]A^A_�DH�|$t2H�|$0u*H�\$H��$HH+E0�s��f�t$f��)�f��f�CH�D$�D$@����H��$HA�S���E1�L�|$fE�I��H��H+U0H��A�W�H+E0A�G�fE�W�����@��o��AD�$$L����D$X	ExE1��D$PL��$H�$�:���f.�H��$H�A��<���H��$�H��fDH��H��H��$H�:�B)�@�>�z��@���t؋t$T��������	�����f.�A��C�DH�|$0�H�D$���fD������D$ A�ĉD$�{���<]�����@��:�
�H�\$xE‰����D�d$dE�W�E��t	E����A��~
�D$TExD	Љ$E���
E1��D$pE�����H��$zH��D��UH��$XD�L$ ��$�L�@D��$���������$�AXAYD��$����D$pD�$$A��D$h���������<�>�T$hT$pE���S��A���F�D$pL�{��'�D��f��A��f�C��"D$?E���E9��=���!D�SH�{���t
�H��@�w�E)�A���l ��%fA��L��fD�o�����f�A��L��$zIc�t1��ƒ�A�<A�<D9�r�L����f�I��A��D$hL��$H��H�t$H��$HH�D$H+E0�~��f��)�f��f�F��DA���$�I�_��C�����fDH��$HL�����H9�r{H��H�xH��$H�H��tQ���H�U8��u�Lc��L)�L��H9�w�8��u�A��t���8Hu�L�L7�L��$HA�9��A��E1��-��D��$�L������H��$H���e���Hc����H��$��8T�g��$�����L��H+�$�A�9|M�g�.f��A�TfA�G�$L��$h��$��D$L��$���$�����D�SE��u	E��������D�$$�D$p
����D�G�<�5D�$$E���P����D$hE���JA����@A���v,��L1�A�fA�WD����A�WI�_D�$$A�G��fDD�$$�D$ �����G��H�D$x����A��]���Es�H��$�f�H��$�I�GH�D$X)�$�)�$��D$L�\$X�D$ A������D$p���D$hL�ۉD$PL��$�D��$fDA��~#�|$?tH��$HD�&D��A�����H�D$0H��t
H�|$XH)�H���t$8����H��$HI���PA��\����E�:�D$8H��H�pH��$HD�`E����A��]�b����D$8���V����|$ L��$�I��D��$�`�|$h���T�D$d���E���A���5�\$�����D�$$A�E�oL��$H����D$8E�I�_�D$�D$ �y�f�A��[�nA��\��L�|$x�U`A�L��$H�L$L��L���g��A��A�����A�����A����5A����KA����H��$HA��������E����D$ H�|$0D�x��A�D$����A�����*A�����2A����E&A����9A�D$����H�L$xH��$<H��$8L��������1�A����D$8��;�$8�C���D$hH�����S���$<�C�H��$H��H��$�H��H�w �
H��H��H9�u$��fDH��$HD�|$ �D$8���H��$HI��D$@�z=D� �P���\���xE�vH����H���8E�cH��$H�PI�ǀ�\t�A��
��A��
���D$8��-���t$d��t"A�����A��~�D$��@H��$HE��A�T$D��AIԉ���Mc���Hc�@���T$P��t"H�UF�,*D��D����Hc���@���D$ �����Mt A�W�D$8��-�L���fDI�GH��$HA�O��\u>A�O��E��I�W�fDH���J���E��H��$H�
H�Ѐ�\t܄�t	��]���D$8�:f�A��
�f���A��
�\����D$8����H��$H�pI��@��=��H�@$H�������PH�xI����������\t)��]�����A�N@8�t-��I���������\u�A�V��]u�A�VI����D��]u�H�MH��$@��:�79H��$H�x^H�|$ u!H���D$8H�D$ H��$H�D$pE��D+d$ E1��L�
��Ic�L��$M��H��$D��I��D��$$A��H��$(L���(fDI��H�-�A��F�,0H�l
E����'A��E��9�u�H�|$ L��H��能����u�L��$H��$D��$$H��$(A���D$��,C�H�
�H��$Hc�Hc��o,)�$��o|�B��H�Hc�)l$ Hc�)�$�������+��H��$���1�)����1��u��$�H��$��L$8H�����-H��H�� D�H����H��H9�u�I�F�D$8�D$ 
�e����L�sD��L���^����L$PHc�L����H��$H�D$h�)����H��$HA��pI��@��\������xE�����A��
tA��
u�Mt D�\$8E��������X���@��\�����xE�P���D�\$8E��������.���f.�H��$HI���P����@H���"��?�H�ۤ"�@Hc�I���D#$�A��t#H�V1�����H����?��A	�9�u�L�H��$H����f.�f�H��$�H��$�)�$�)�$�E����9H�D$xI����������D�$$�D$p�����A����A����p�|$��!	A����	A����	�D$����A�F���L��$8H�L$xH��$<L��L��������1�A���L����;�$8A�G��I����A�W���$<A�G��D$L��$H�D$ �$�D$P��@�����D�$$�������[�fDD��$��9�H�	�"�p�RA�N��rB�Hc�H�H��$@�,�f�H�
٢"D�l$d�H�H��$@E�����P�������H�
��"��?�H�H��$@�����G�<��L��H)�H��$�A��@��`�ZA����P!E����
H�M(L�EHLc�$�A���u#A�L��$�H�߾��$�D��$D��$���L�CH��L��L�����H�K�CD��$I���D$hDŽ$�L�D$pD��$��i��^f�CI�_A��D�$$E���c
Hc�$��T$hI��I)Ń�VH�|$0A�EA�U���f��M����I)�A�:]���M��D��$��"f�A�Ff����I�A�>T�R�H��D��L��L���6����t�A��4�f�H�u�P����H�XH��H�\$X�H��$H�I��H�@�u�:�c1��)��D��L�-̆+L$X1�H�l$p�A��M��L�o�L�|$hL���DH��J�l-H���UA��Lc�A9�u�H�|$XL��H���A���L�*���u�Lc�L�|$hH�l$pC�L���p�'A�1�I��L��$H��H�U�'M�aL��$HA�yH���:��-I��f.�L��$HA�	M��I��H��
u�D��D)�\$hH�|$0�,9���1�EX='��1�\$h�S;U\|�S�U\�� ��2H��$H���EXfD�E`�|$M��E1�A�E1�A�_��L��$8�E`��A�G�E`A�G�_��fDA�����L��A�����A�E���%A��I����A�W�A�G�E�o�A����"D$?�����L�{����f.�D��H���"��H��D������H��"H��BH�n�"�|�D�A9����t$PH��$H�t$h�;�f�L�HL��$H�H�Q߀�[�TH�5]u��Hc�H�>��{�U�fD��� D�SL�C���t�KL�C@�s��M�xA������$���x$�\$9�tAD��$�E���\	DŽ$�����D�l$E��xD�$$E��y�D$�$��$��D$��$��$��$���1�	ک���������D‰$���%�A��t$1�L��H��$��s����������N)H��$�f��f����fDH�D$x�����A��I�_���b��
�|$�A�L��$H�:�D$D�$$�D$ �D$P�$���f���#L�{����f���Q�H�PH��$H�H��\���xEH�p�(	�D$dA�\���l	�D$8E9���,��A��
��A��
��D�D$dE���A�����A����D$��I��E��E��D��E9�rpL��"D��D��H�5V�"����H�
ћ"A�����F�T�D�A9�t-��f��Љ����A�����F�|��9�����A9�s�A��L���L�sD��L��D�\$h�e���D�\$hHc�I�D��L���O����D$hHc�H��$HL��&�@L��1��B��f����H€:Uu�A�h�����U+���EA�9)��H��$@�|$���$<D��$8������E����D$M��DŽ$��D$ ��D�D$����A�����	A����H��$H�B<<@��<'��@��u1�<{�k�D$A��I��E�w�L��$H�D$ �$�D$P���H��$@A��\�3���M��A��X���D$D�$$DŽ$��D$ �
�DH�D$xE����D$X�D$XL��$H�������H���aE���$DD$P�$�D$DD$ E9��D$�DD$X�D$X������L�cL���q����|$PHc�H��$HL�|$h�H�fDA��I�w�A�����fDH�����<�t�M��I)�Ic�A�ʀH��$z�D�T$pD��$��ް��D��$��;D�T$pE������D$p������w.�D$8L��$H��D�\$pD�$$�,�f.�D�\$pH��D��1�UH��$X�D�L$ ��$�L�@D��$����^_��D�\$pDD\$h�D$p
D�$$D��$�D�\$hA��E����f�A����A����u��L�A�D��fA�O�������!A��L�{���"D$?����H�D$xI��A�������D$X����H��$HA�X�pI��@��\�����@A���vDŽ$�H�|$0H���� A�F�Hc�$4��H�H����H=�����&���)�9���&H�D$0�D�$$�E���+H�\$pE)�L��1�E�������H�|$0H���E����
��$4����H�A��Mc�I�Ń�H=����G&�7��)�9��6&H�D$0�H�߉H��H������DH��H��H�_H)ȃ�Hc�A��H)�A���Vf��f����f���Uf�Gf�F���fD�E� ���D����C����B�����8�����7�����6����5����+����*����)����(�p����LfA��1�A�1�fE�wA����/��$�D���t$A���E)�t$ �4$D��E���ԉt$P�|$�����A�WI�w�A�G�����A�QEωL$D����A�� M�	ElD;eh~D�ehH�E@L��$HH��u���H�H�����PA9�u�L��I��f�x
�U����I����f�DŽ$8����DŽ$<����A�9)���|$�H��$@M���q��������D�$$DŽ$��D$ �D$�y��f���$�DŽ$��������f���FI�_A��l����H�M(L�EH�L��$���$�H��H��$@�0��H��$@Ic�H�~����H��$@A�E�]��f���C�Uf�S�H��$@�bH��$@�B�l���$�L��$`DI�{I9�wH���L��躵�����$M�M��u�H�D$x�(�L��$H�(�H��$H�H��\���~Q��H�VH��$H�N��\�6���H���~�Et��D$dA�\��������D$81�A�\A��\�����@���y���L�t$x�U`H��$HA��L$L�����E�A��E���F������@�����������������"L��$H�D$8�<�fD�\$��������EÉD$�D$p���D$h�|�D$ A�I�[A�P����E�WH��L)�f��fA�G�D$L��$H�D$8D�$$�D$ ����C���f��H���C�UD�C�f�FH)��&������f�D;T$p����NA�E����H�|$0��I�_!��E�����A���
�|$?�H��$�D���b����D$8���DD��$�H��1�1�UH��$XD��D�L$ ��$�L�@D��$��G���ZY��D��$�DD\$hD�$$D��$�D�\$hA��E���i��H��$H�x\��H�pH��$HD�`E������D$8���L��$HH�EHA�'M�HH��$��>L��$HA�x<DD�A�@�PՁ��xM�h��tN��A9��9
H�ML��E1��H�����t#��D9��
��AD�u�A9���H�D$xD�$$�9H��L��I�NjD$�D$ ���DE��H��$zA��Mc�t1�A����F�4G�4D9�r�M����fDH��$HA�R�pI��@��\�p����@��#L�����f.�D��H�t$XI�$L�\$ ��A�GH)�H���m���L�\$ fo�$�fo�$�AOI�[!AW��Aǃ����E��A��Ic�H�$H��$zt�D$h1�A����B�C�D9�r��D$hH�<$L����DL��$HA�y\���D$8�&����D$PD�$$�$������D$�D$ ���D��$��D$8���H�D$x�	�%���H�D$xI����
��D���A�9)��D$PD�$$M��H��$@�$���f.�fo�$�fo�$�A_Ag�2����M�wL����$ H+�$�A�Uf��fA�G��tI�C@f�x
��H�I�C@�D$��;D$`t A�9)u�D$`��$0I��A�F�A�F���$���$���$�A�C`H��$�L�0H��$�L�H��$��0H��$��8H��$�H�������$0��)�9���H�D$x�1������{h���H�D$xL��$H�7���fDD��H�E�ڃ������H�5LeHc�H�>���D�T$dD��E���-�D$8�A��������H�d�"��?H�5��"�
�RLc�I���F#�A���tH�pE1�����H����?��A	�D9�u�L�H��$H�'���f�A��A����H��$�D��$D��$L��$�Hc�H�$�D$h��i��$���D�xD�@E��E��@��$�L�eH�E���9L�[�C^H���xL��H)��ԈSH�t$pH���C�H�$H��D��$�D��$�L�\$h�G���L�\$hM9�D��$�D��$�L��vDH�UH@A�I��f����D�E��ta���"A�V�H�EHDʈPH�EHH�PH�UHM9�w�A��H$M��A����'���L��$�D��$H��D��$������"A�V�H�EHDˆPH�EHH�PH�UHM9��_���H�<$L��$�D��$D��$H���H��H�$H�t$pD��$�H��D��$�H�L$h�.���M9�H�L$hD��$�D��$�����@1�1�����M��H�D$x�<����DA�pH�4dI�PH��0t�H���2H��0u�D9�������\$M�HL��$HA�H�\$ �$�\$P�}DA�F���IF���H��$�ʉ��<��f��D$8��\����D$D��$8�D$ E��yD�$$DŽ$��5��A�)H�hc��+��
��-��
A�Q��I�yH��$HA�QI��H���������1��I���4�L��$HA�Q��\r�A�1�0u�D9��w��-��
��+�/
H�|$0L�U(u!A�����
M���fA�z���A�]RL��H+E(A�UH��L��DŽ$4	A�GI�G	L+U(�|$�E�WfE�OL��$HA�GI�������D$�����y��E1�H���|$M��E1�H��$HE1Ҹ^A�^L��$8����A��ǀx?��@<=t<!t<<�	�|$M��E1ɸ`E1�E1�A�`L��$8���L�HL��$H�@�PÀ���<<�fH�U�>�K�H�PH��$H�x)���|$E1ɸYE1�E1�A�YE1�L��$8���H�PH��$H�P��)t,��t(H���f.���)tH��$H�H����u���d��H�D$xL��$H��u��H�MA�)A�M�qL��$HA�A���I�Af�H��$H�I��H��H�����u�L��L)�H�|$0�D$h�����fD9����� �m1�E�����L�U(����H��!���=��H���|$E1�E1�A�H��$HE1��ZA�ZL��$8����H���|$E1�E1�H��$HE1۸XE1�A�XL��$8���L�HA�)L��$H�HH��_�S���H���|$M��E1�H��$HE1Ҹ]E1�A�]L��$8�>��A�SH��$HH�t_L�JL��$H�rHc�����[H��1�fD��H��$HI��H���LN��r�Lc�B�u�@��)�+�����H�D$x�&�{��H��$�L��L�$M�w	L�\$H)�H�{H��觜��I�G�]L�$H)�L�\$��$0f��f�CA�GUfA�GI�C@���D�t$d��$�<��$� E���S���L�c�( L�������) Hc�L�H���DŽ$��D$�D$ M��E�������h���L��$8H��$<DŽ$<DŽ$8L���A�<)��<:��I�QH��$H�B���-<KwRH�6\��Hc�H�>��I��H��빃	fDI��뫃	�	��	��	@��	�߁	�Mt��H�D$x��������(�D$L��$HM��A�H�D$ �$�D$PH�E]������D��D�rD�WE9��D��D�����A�����F�T�D�A9�t5��@D��E���A��A���D��FD�\�E�A9�uE��A��A�RE9�s�A9��&A�D$�D9��_
D9��I�QD9���A�H��D��H�T$h����H�T$hLc�I���fD�t$ D��E��D)�H�|$0�D�D$ H��$H����D�L$PIc�E�C�fDE��A�T$D��A��AIԃ�A��Hc�D��E��t#H�UA���2H�ʃ�H��A���D��A��H��E9�u�E���W��D�Mt �$���|$P������<���xE�`�H���D$8���DDŽ$�D�$$E�����H�\$p1�L��������JI�_A��,���H��D�Bf����H€:Uu�D��)Љ�$�E����A�H�M(L�EHH�ߋ�$�L��$��D��$D��$I��贻��H�CHc�$�H��H��H�D$p舘��D��$D��$�D$h1Ƀ�i�L�����@D9��0��C9�����D����@A�yE�`�I�����@H��$�M��D�Qf��f�Af����H)���f��u�D$��$��$��$����D$d��$���$���$��������L�c��L���x����H�I�A�$I�\$H���\���� H�H��H�XH���C����
 H�H�H���1����/ H�H��L�cL�������_ H�I�A�$I�\$H������0H�H��H�XH�����D�|$ H��D$8H��D$hH��$H���H�D$xL�L$ ������HI�_A�����A��H��I��L��$���$�D��$D��$�褹��H�CL��H��H��H�D$p�}���E��D��$�D��$���rD�$$L������Hcщ�����K�������D$DŽ$��D$ ��������H�D$xL��$H�@���H��$�I�OH�p ��H��H���҈Q�H9�u��	�A����y������E`)؉Ã����H�|$0�i���A�H�}(��$����j���I������2��A�H��D��$H�T$h�/���H�T$hD��$H�H����L��I��雾��H�D$x�����A���p�D�$$L���o��L�JH�ML��$H�B<<�)�\$<'A�}�'DD؉\$ �$�\$P����DŽ$�A	ŋD$E��D$ ���A�GdH��$H�B<R�XL�Z<<�<'��L�RA�'�����L��$H�BH�u��@L��$HA�SH���#
I�RE1�H�=jVA������)C�L�D�lH�H��$H�I��H�R���9E��x�Hc�u�E����A�OH��$HE1�1�H+E0L�|$I��H���D$H��A�G�H��$HH+E0fE�g���A�G�L��$H���I�QH��$HA�q�0��I������o]`��DŽ$����H��$�H�H��H�� ��H����
H��H9�u�H��$HD�|$ �D$8�D$p����H��$�H��@H��H�� ��
H��H��H9�u�����H��$�H�H��H�� @�
H��H��H9�u����H��$�H��@H��H�� fD�H����
H��H9�u��K���H��$�H��H�� ��H����
H��H9�u쀌$���������H��$�H�H��L�B D�8@>H��H��I9�u����A9�����-����+��H�|$0L�U(�_�A�1��?�D$A�>�D$ �$�D$P��H��$��o��)�$��o���@)|$ )�$�H��$H��$�H��L�B H���H���� H��I9�u��>���z&�=
L�RI��E1�������A�X���H�E@�E|H������A�q�P	I��A�W��PA�W�H�H��u����H�D$xI���?���f�E��E)�H��$HE��~I�AH��$HA�A9��>L�HL��$H�8)�&H�|$0�A�`�|$M��E1�L��$@D�mxE1�A�A�`H��$4DŽ$4L��$8�R���H�� fD�
H��H��H9�u��]��A������H��$�������	f���H�����:
����u�D$d�������L�c�L��蘎���Hc�L�H��腎����H�H��H�XH���l����
H�H�H���Z����H�H��H�XH���A�����H�H�H���/���� H�H��H�XH�������. H�H�H�������0 H�H��H�XH������^ H�H�H���ٍ���` H�H��H�XH�������/H�H�H��讍���0H�H��L�cL��蕍������Hc�L�H������$�����$�D�d$����H��$�H��$�H�\$pA�D��$M��E��Hc�D��$I��H�4$D��$A��D��$E��f�H�$H�t$pL��H�]H���I��I9�s>H�EH�A�$I��f��D��0A�T$�H�EHD�PH�EHH��H�EHL9�w�L$A��I��D;�$|�E��H��$�M��D��$D��$H�\$pD��$D��$�4��H�U�
��L�HH�D$x��#��fDD�T$X1ɉ�H��E����1�解�����^H�U(I�GLc�H�MHH��H)�I�H��H���H�UHH+E(�BH�EH��H���|$E1�E1�A�H��$HE1��[A�[L��$8�ɹ����$������H�|$0���D$h�}XH�]P��~zHct$hE1�L��$�D��$�E��I�܉�H��I����H�H�D$pfDI�t$L��L���P�����uH�D$pA�<�cHcE\A��I�D9�u�L��$�D��$�D�T$X1ɋT$hL��H��E�����p����Å��%E���ML�U(�e������$H�\$xE‰���A���Q�E1�<>H�MA�)A����A��7��L�uP�EXHc\$hL��$�D$XH��$���~�CL��$E1�I��H��D$pH��$��D$%��$��QH��$�A�<uZA��u`f���V��9�t��$�����D$pHcE\A��I�D;l$X��I�^L��L��H���m�����t�yҋT$XHcE\L��L��$D)�I�<��Hc��Ҋ��D�\$pE����H�EP�}`H��$�EX�O�D$X��~@H��$�|$X1�f�I9�t�f����9��<��HcE\H�9��I�^��H��$L��H��A��E`��A�F�����D$h��H�A��EX���DŽ$�������D$�D$ ��H������D$h�1����$<�|$�$8��!�<)�*I��M���^E1�L��$HE1�E1�A�^霶��A�o1�I��L��$H���H��$�1��f����� t'H����p�
��t��u��߈
����È
��D�d$dE���S���L�c�L�������' Hc�L�H������) H�H�L�c�L���I����D$A��D$ �$�D$P�A�L��L��$�D��$��f�������H�D$xL��$H��l���H�D$xL��$H��T���A�R���D�$$����L�RA�>�����L��$H�B�����E�����E`��D�D)����-D…��;Ed�f��A�fA�GA�`�|$M��1�D�mxE1�E1�A�`L��$@L��$8DŽ$4�	���H�D$xL��$H�醼��H�D$x��v���@��)���H�D$x�'�\���H�D$xL��$H��D����HՁ���L��$HL�R�ZE1��B��H�D$x�;�	����E`�t$pL��$�H�����������H�D$xL��$H�A�һ��M��E1�E1��M���L��$I�^�E`�H���H��A�����H��$HD��A�GfL��$HA�CM�S�`�H�D$x�6�m����׉�A���2��H�E(H��I9��_�D$1��t��A�I����A�G�L��$H��1ۉ|$%��������$�1ۉ�$�����%�D$T餯��H�D$xL��$H��ۺ��H�\$xI����Ⱥ��H�D$xL��$H�*鰺��H�D$xL��$H�1阺��M��H�D$xM���>邺��H�D$x�0�r���H�D$x�*�b����EXH�]P�D$h����E1�Ic�D��$�M��D��$�I��D��E��H�D$pL��$�IcE\��H�;l$h��H�T$pH�sL���,�����u��L��A�GA�fA�G���H�D$x��ʹ��H�D$xL��$H�+鲹��H�D$xL��$H�:隹��H�D$xL��$H�0邹��M��L��L��$E��D��$�D��$�D�L$X1�D��L��H��L�\$pE��D�D$h��L�T$X�ښ������f��A�GA�fA�G�;���I��H�D$x�����H�\$xI�����H�D$0H��t	�8������}p1ۉ|$逭��H�D$xM���鼸��H�D$x�)鬸��M��E1�I�Ӄ��~�M�����H��$�1��H��$��I�GH�D$X���D�t$����L�T$XD�D$hL�\$pE�������A�{R��A��tWE��~DD9md|%fA��A�fE�o�>���H�D$x�:�	����H�\$xL��$H���A��������޺H�5RL��������u�A�GhA������H�
1E�A�4H���1�����Ek�
F�d&�A9��E��uA���E��A�GfA�fA��fE�_�����H�5}D��
�����I�GH�D$X鏽���Ɂ���|$dD������1�H���D$8�������AWAVAUATUSH��H�$H��(H�T$H�L$L�D$dH�%(H��$1����D$\%�D$h�D$H�|$xH����	H�D$I����M��H�H�D$H��t�H�|$�:
H�D$�M����I����L��$�H��$�I��H��$�I��@H��$�� ��	L�[PL�
ZPL�ePL�\PL�=IPf.�Hc�M�,A�}(�A�|*�
I�D�L��H������������H��L����������w�H��L����������t�H��L����������i�H��L����������v�H��H�=�O����������H��H�=lO���������H�ƹH�=ZO�����u&���Hcځ��M�,	�A�}(���@D�\$E������%�=���=������H�D$xH�\$�D$h8A�8L)�D�Ǻo�nH��B�f����QH��H�H@��u������
���PH����f������2���f��������	�������� ��fD���0��fD��%p= ��~D=@�=P�d=0����H�A�
H��$fD��$�5����=���DŽ$�
DŽ$��$L��$L��HDŽ$�HDŽ$�HDŽ$�HDŽ$�L��$�L��$�L��$�L��$��k}����$����L�D$hH�L$xL��H�T$pE1�H��$�HDŽ$�HDŽ$�L�|$pƄ$^L�D$0H�L$@H�T$8H\$xH��H�D$dPH��$�H�D$0PjH��$�H�D$HPH��$�PH��jj�
���D��$�H��@�D$xD)�E����HcT$\������$�����$�H�H�T0H�Ee"H��H�T$H�H�T$HH��H�����P1�E1���ERCP��$��0L;5e"��$�L�l$x�E��$�LD�H����$�f�uE1�f�E��$�L�u D��f�E��A��f�}f�UfD�E�EH�H�E(��$�H�M0L�4HDŽ$DŽ$�DŽ$�H��$�L��$�L��$�DŽ$HDŽ$�L�t$xA�^�}j�t$0��j���t$HSjjL�D$pH��$�H�T$x�{�����$ f�E��$(f�E��$4H��@D��$�f�EE��t�D$d�����L$h��uH�D$x�8t�D$hH�D$pH�PH�T$p�H�D$pHcT$\L)�H9��9�D$h�T$1ۅ��É\$�L)�H��H��A�U��D$h��u[H��$�L9��
H�B��t$L��H��$��B��R���	�H�M�,I�\A�U���	��{��H��u��D$h5��$�����t$�����L��A�������z��I��H��tr�\$L�l$ fDfA�yuEA�A�A�Q�L��L���	кH�M�T�E�A��u证��E����af��fA�AI�yD�����]z��I��H��u��D$h���J�E��s�D$d����f�H��$dH3%(H����H��([]A\A]A^A_�@���@����� �����L������z��H�|$�������D$h,A�,��������������
�P�N���L�5�a"���DŽ$���DŽ$����H�
�HH�D$H�H�D$H��tD�1������D$h������$�]f9]���D$h�%��������$E‰D$hH�pa"H���D$xD�D$hD)�H�\$H�
+<�E���m����#���L�L$l1�L���D$l��$�L����������M�D$d���f����Eu@���tH��$���H���4�р�9�D�f�Uf�M�/����������l���f��������H�D$�D$hA������H������H�D$1��c����DŽ$�
�Z����D$`��xM��tH��$���H���9�u��f�Ef�M�"���H�D$xH�\$�D$hA�L)����1�L��L�������D$`��y���$�1�L�����������f�M�����D$hA����H�D$xH�\$�D$hA�L)�����v����M��I��H��1��,v��f.�f����������w#H�cF��Hc�H�>���H����1��fD�1����
1����1����1��f.�DAWAVAUATUSH��H�$H��H�$H��H��$� H�T$XI��H�w ��$�A�UH�|$(��$� L��$�H�\$hdH�%(H��$x 1�H�GH�t$xH�sH�t$`H�D$PH�GH�D$@H�H�D$ �G0%�D$H��$� ���D$t��$� ��H�H��H��$��D$H�H��H�_H�D$8����b��	�<H�H��A�|\��|$t��<H�t$8L�D$ ��|$1҃���@DL���FH��L)‰N��D�ډF�A�Ef����IŸA�}T��D�Z9�|��D$L��H��$x dH3<%(�D$L�uMH�Ĉ []A\A]A^A_�H�t$(�F.�X���f�A�Ef����I�A�}Tt�H�D$hHcX�(I�ۅ��%H��H�t$`H�|$8�$H��D�L$�vt��D�$D�L$�L��1��Bf����9�L��Bf����H€:Tt�D�d$HE����H�|$XHc�H��H)�H��H�\$PH9���H��H)؉�H�H)�H�|$XH�D$(H�|$XH9xvH�xH�T$8L�D$ E1�D�T$�A�Ef����9�|3L��A�[L)ǃ�E9���������BA��H���z�)ljJ��z�A�Ef����I�A�}Tt�Ic��Ic�D�T$H�D$L����L��$�E���D$s�D$sA��D��$���$�H�D$xH��@HH��$�H�D$XH��$�H�D$1�H�|$h��H�t$@H���D$0����H\$8�H�D$D�_�D$H9�s.�0D�L$H�D$��t$0E��t������E�������E1�E��L�t$`E1�D���D$4I��L�T$8DŽ$��=��B�����M;l$�K����R��A�A��E�n��I��A�V�A�F�E9�~RIc�H��LҋD�j��x�E��t{A�O�L��H��I�L
�f�H��H9�tW9u�z9xu�A��E9������IHcD$H�T$8Hc�H�|$`HD$I��H�D$hH�T$`H�|$8����f.�Hc�HD$ D�D$H�D$��$E��uH�
�H�<�DL$4�L$4H�5CI���tSHD$�|$HA��0��t���wd�<$8~5�F�w-H�
�@Hc�H�>���D$L��
���������E1��$=�w�H�=�@Hc�H�>��D�ۉ���H�=�X"��?D�H�=Y"C�IB#4�M�����E��t$E1�f.��x��H����?��	�A9�u�A���E������y:L�D$PH�T$XI9��f:1�H�B�I9�s4�R�����t�#fD�������#H��I9�u�H�D$P�wH�D$X�����$|�
����$h����$T���$,����$@����H��W"��?�H�X"�R#4�H��ɉ���A���D$0��t&H�D$1�H�P���H����?��A	�9�u�D�D$0�G�D$����Hc�$�HD$PH9D$��#A������H�t$H9t$P��$A�������<$�t*A�D$D;d$�����KE�kA��I��A�K�A�C��D$����'�D$0=��!C�Y.��
������1Ƀ���9���4A������A�T$D;d$�����D�L$�KE�kI�{A�A�CE����/�L$0�����:�T!�L$0��( �����Ƀ�@��@��9��v"�$����MAD�LE�;l$�#����E�nA����A�F��I��A�F�A��A�F������<$�t*A�D$D;d$�����KE�kA��I��A�K�A�C��D$����&�D$0��
�W?��-1Ƀ�
��-�<$�uA��I��H�D$�R�u�@��f����9���9;l$�\��������E�nA����)�A�FI��A�F�A�N������A�t$D;d$�����SA�CI�KA��T$E�k���z.�|$0��
�G4�e 1҃�
� �<$����}AD�ID�;l$������A�FA����E�n��I��A�F�A��I��A�V��c���A�L$D;d$����A�DD�D$E�kI�{��$�A�A�CE����'A�����A��tP����9�|$s��9L�
`T"���A�A��A����D�L�
�T"E�AH�T"F�D�A�E��D�D$0D9���E9�@��	�<$+@��@8���.A��A��I������R��~,A�D$D;d$�����A�LE�kA��I��A�K�A�C��D$����$�����A��tH���B>�|$s�7>H�
�S"����������H�
�S"�AH�ES"�|�����|$09���9���	ȃ<$+��8��3A������A�T$D;d$�����A�DA�CI�KA��D$E�k���R#�����A��tK����;�|$s��;H�=�R"�����������H�=S"�<GH��R"D�L�A�D��D�L$0D9���D9�@��	�<$+@��@8�� 4A��A��I������H�t$�Ff�$f������$��F�$<S���$�B����MD��h�3#H�D$��f�@f��$�f�����M4f����D��$� A�D$���)7D;d$����E�kA��I��A�[�A�C�A���n���A�T$D;d$���H�|$��E�kI��A�[�A�C��Gf�$f����H��8T��A��9T$�E��$��KH��H+T$ E�kI����A�C�A�S��Pf����HЀ8T��*A������H�T$A�L$D;d$���H��H+D$ E�kI����A�C�A��A�C��Bf����H€:Tt�A���}���H�T$L��$�H��A�L�T$H��H+L$X��$� P��$�PAUh�H��$�PH�t$8H�|$XL��$`���H��0L�$L��$����������2�D$L�,�H�D$�Xf��$�f����HÀ;Tu�Cf����HÀ;Tt�H�T$L��$�H��A�L��$�H��H+L$X��$� P��$�PAUh�H��$�PH�t$8H�|$XL��$`� �H��0����w����$��L��$�L��$����X��8�t,D;d$�X�H�CE�kA��I��H+D$ A�C�A�C�A������H�t$H9�$���!A�D$D;d$��S�<$UA�CA�E�k�8A��9D$���H�D$I�� A���@A�C�E�k�f����)�A�[���D$���"�D$0=�%��$�� ��$��&�D$0=���$=���$�E;l$�[��E�nA��I��A�^��A�F��
�t$����!�D$0��
�a7�m%��
�$�E;l$���E�nA��I��A�^��A�F���D$���!�|$0H�N"�������H�=N"���H�|$�BH��M"�H����2�WH�
c<Hc�H�>��D$�����E;l$�g��E�nA��I��A�^��A�F���D$���MH�D$(�p4H�P���S.H;T$�'2�E;l$�
��E�nA��I��A�^��A�F���|$���d �|$0��V �D$0Hc$H�
<H��$��"H�
�;:�E1�E;l$�����E�nA��I��A�^��A�F��H�t$�����D$0=�w*Hc$H��$�H�
�;�"H�
�;:�7�E;l$�&���E�nA��I��A�^��A�F����H�D$H9D$Ps?H�p�H�D$(H9pvH�pH�D$�L$H�@��…���H��$�����u1ҋD$��t�D$0=�wH��$��у��Eу��<$��8��#A�D$D;d$�n���E�kA��I��A�[�A��A�C���<$�t*A�D$D;d$�2��KE�kA��I��A�K�A�C��D$����D$0=�Q-�d#�� �B-��#�|$0����t1������1Ƀ���9��8+A����B��~*A�T$D;d$����KE�kA��I��A�K�A�C��T$����|$0�����1�:!�W�������1Ƀ���9���&A����R��~*A�D$D;d$�$��KE�kA��I��A�K�A�C�D�L$E���o�D$0=�6.���� �'.���|$0����t1������1Ƀ���9��c)A���w�1����A�L$D;d$���D�D$�CE�kI�{A�A�CE����"�D$0=�:*�I�D$0=/ �%*��D�L$0�A��_ t1�A��0����@��@��9��*�$�u-���AD�LE�;l$����E�nA����A�F��I��A�F�A��A�F���1�����H�t$H9t$P�aA����H�t$H9t$@��H�D$(�p4H�P����,H;T$�7-A���3�H�D$�PA�D$D;d$�F���A�SA��I��A�[�A��A�C���H�t$H9t$@��A�D$D;d$����E�kA��I��A�[�A��A�C���D�L$E���H�D$(�p4H�P����+H;T$��5A���~��|$�����t$0H��G"H�
H"H�=�G"���H��$���H��$��H��$����H�5bG"�AH��$��D��<��a)LcD$E1�LD$L9D$@��&D�<$�[H��$��Љ�����H��$�����FH��$��D�H��$��<��T&Hc�A��I�L9D$@�?&A���Ё��v�H�
�F"��?�4H��F"�v#�H�����tI�pE1����H����?��	�A9�u���G����T$���q�D$0=�t�
��
����E;l$�-���E�nA��I��A�^��A�F�����L$���
�D$0=��/���
����E;l$�����E�nA��I��A�^��A�F����D$����D$0=������ ���q�D$0=��k=��`A���0��D$������A��tH��$�����|$09��A)9��9)�E;l$��A�TE�nA����A�I��A�F�����D$���)D�L$HE���T!;t$0��0�D$0���0H��$��9���/A���|��D$���*;t$0� �E;l$���A�TE�nA����A�I��A�F��6�H�D$(�@-�D�D$E�����@0 �g*H�D$(�p4H�P���0*H;T$��5A�����A�L$D;d$���D$A�|E�kI�SA�;A�C���0A�����A��tO���J+�|$s�?+L��C"���A�A��A����D�L�D"E�@H��C"B�\��A�؋\$09���A9�@��	�<$+@��@8��Q$A��A��I���&��<$mt*A�D$D;d$�9��KE�kA��I��A�K�A�C��L$���6�|$0H�
=C"�������H�
zC"���H�|$�AH��B"H���O���./�W���/���;H�|$�@88����1Ƀ���9���!A���`��A�T$D;d$�w�D�D$�CE�kI�{A�A�CE���ED�L$0L�|B"D��A����A�L��B"��D�L�L$E�@H�'B"A�yJ����%E�AL�
�0Oc�M�>A��R��~*A�D$D;d$����KE�kA��I��A�K�A�C�D�D$E���l�|$0H�
�A"�������H�
B"���H�|$�AH��A"H���O���q(�$����-���H�|$�@88����1Ƀ���9���A����1����H�D$H�T$L��$0L��$�L�$H��A���pL��$�H��H+L$Xf����Ht$(��$� ��P��$�PAUh�H��$�PH�|$X�?�H��0����1L��$�L��$��E����Z�A���R��$��P�_H�|$�t$L�G!���r�|$0��"��O����A�0L��H+|$ ����F@����'H�=%/@��Hc4�H�>��|$���{�B�|$0��$���������	��A����E�L$D;d$����C�L$A�C��$�A�I�CE�kH��$�����B�|$0���$����������	� A��E��L��$��*�E�L$D;d$�F��CA�CI�[�D$A��D$E�k�����|$0��:���1���	��A��E��I�����D�JE��~*A�D$D;d$����SE�kA��I��A�S�A�C��D$���]�|$0��P���G���	��A���W�E�L$D;d$�s��CD�D$A�CA�I�CE�kH�D$E���1�|$0����������	��A��E��L�\$���D�L$E���������A��tH����%�|$s��%H�
>"����������H�
M>"�AH��="�|�����|$09���9���	ȃ<$+��8��A���a�H�T$�Bf����H€:Tt�A�D$D;d$�e�H+T$ E�kA��A��A�I��A�C���H�W="H�H����H�t$H�|$PDŽ$�L��$��FH��$L��$���$�H��$�H��$�H�D$@H)���$H�D$XH)���$H�D$H)�H��$���$�Ff�$f������$(�Ff������$,H�����H��$H�D$(H�@@H��$ ��L��$�L��$������KA�T$D;d$�6��E�kA��I��A��H�]<"A�C��@S�A�[�����<$�t*A�D$D;d$�����KE�kA��I��A�K�A�C��D$�����t$0H�
�;"L�
=<"H�=<"���H��$���H��$��L��$����H�5�;"A�AH��$��D��<���HcD$H�t$�<$�L�uA��I���$L9L$@��D��$��p���H��H��$�I�������HcɃ��)�H��$�H��$����H��FH��$��D��<��zMc��$M�L9L$@�eA�A���=��y���H�5�:"��?�<H�5�:"�#�I�����tI�yE1��7��H����?��	�A9�u�A���-����D$A�T$D;d$�?���t$�CE�kI�KA�A�C���1�|$0H�5G:"L��:"L�
a:"����H��$���L��$��H�5�9"L��$���H��$��A�@�D�A�<����$LcL$LL$-���DE�LE�1���L9L$@�vD�<$A��L��$�L��$�L��$�L��$���$�H��$���$�H��$��S���P��I�������Hc҃��)���H��DEA�D�A�<���Hc�A��I�L9L$@��A����=�~�H�
�8"��?�4H�/9"�v#�H�����t$I�qE1�����H����?��	�A9�u���I���A���X��H�t$�Ff�$f����H�T�:Tu�Bf����H€:Tt�A�L$D;d$�C��H��H+D$ E�kA����A�CA��I��A�C�������$�A������A�L$D;d$����H�|$��A�CA�E�k�Gf�$f����H�T�:Tu�Bf����H€:Tt�A��9L$����H��H+D$ E�kA����A�CI�� A�C��R���R��~*A�D$D;d$�g���KE�kA��I��A�K�A�C��L$���R�t$0��
�� ��1���
����~�<$�uA��I�����M;l$������A�VA��I��A�^��E�n�A�F�����B�D$��~*A�D$D;d$�����SE�kA��I��A�S�A�C��|$���z�|$0H��6"H�
7"H�5�6"����H��$���H��$��H��$����H�=`6"�AH��$��D��<��M�t$LcL$LL$��~�<$�uA��I���$L9L$@w|�F����P��H��$�I�������Hc҃��)�H��$����H��FH��$��D�H��$��<���Hc��$I�L9L$@��A����=��z���H�
�5"��?�4H��5"�v#�H�����tI�qE1����H����?��	�A9�u���/���1�����D$������
����������� ����D�L$0�A������1�A�������rbH�|$(�,���|$�������A��A��I���w��H�ƒ�I9�s9��=��H�T$X�������|$0���t�r�������v�A��A��I���/��- �������.���	����� �D��L��$�D�<$L��$���$���$��U;l$����D$E�nA��I��A�F����A�~���A�F����A��A��I�����A��A��I�����A�D$D;d$������E�kA��I��A�[�A��A�C��`��A�D$D;d$�|����E�kA��I��A�[�A��A�C��-��A��A���!��A���KH�D$(�p4H�P���H;T$�O$A������H����Ѓ�<�t���=��/�H�
3"��?H�=Q3"D�C�@B#����E��tH��1����H����?��	�9�u�=��������A�D$D;d$������E�kA��I��A�[�A��A�C��D��H��2"H�H����H�|$H�t$PDŽ$�L��$��GH��$L��$���$�H��$�H��$�H�D$@H)�$H�D$XH)�$H�D$H)�$�Gf������$(�GH��$�f�$f������$,H�����H��$H�D$(H�@@H��$ ��L��$�L��$����N���H��1"�@SHD$H�|$�G�$�4��A������D�H�<$A�����D�KHc���A)�H��H��H�H��(H)�H�$HcH�\$PHcwH��H�H��H�)�H9�sH���P���������)�H9�u���"�E9l$�(��E���I��A�F�E�n�A�N�H��H;<$u����$4��$0H�t$��)��Ff����H��<Tt�I���VL+L$ H��E�A�����<w�CD��f����)ƅ��D;d$��E�A��I��A��A�C�E�k�����H�D$�L$�xA��f�<$fA��E��I�����A�D��+L$ ��F<�H�5���Hc�H�>��A�����A�����A��A��I���t��A��E��I���e��A��E��I���V��A��A��I���G��A���>��L�L$E�ID8H������@��@��9��A��A��I�����A�����A�����A������A��E��I������A������A������A������A�����A�����A�����D;d$������$�E�kA��I��A�C��DA�C����A���l��A���c��A���Z��A���Q�������L�L$E�ID8��������A���&��A�����A�����H�D$(�@,����A�D$D;d$�!����E�kA��I��A�[�A��A�C�����A������A�����A�����A�����D�HL�+."H�D$�@C9������`���D�@A�@�<��A��A��D	����?���H�\$H9\$Xr1H�D$(�@,�����tH�D$(Hc@(HD$PH9D$X���|$L���D$L�������D$L�;�$�����H��$�H�L�\$H��L�$H�~�AE��L�\$L�$��$�~"H�|$PH�D$XH��$�H)��H�D$H)��FH�D$(�@.����A�����A�����A���y��A��A��I���j��A���a��A���X��A���O��A���F��A���=��1�A�p�Rf����9�*A�t$D;d$�D���yE�kA��I��A�{�A�C�����A�@���uf������t9���;l$����A�A��I����A�V�E�n����1�A�T$D;d$������A�CI��A�K�E�k����H�E;l$����A�A��I����A�F�A��E�n��U��1��R��~*A�t$D;d$�h����E�kA��I��A�K�A�C��������E;l$�6��A�A��I����A�V�E�n�����1�A�T$D;d$�
����A�CI��A�K�E�k������E;l$����A�A��I����A�F�A��E�n����H�t$�~@8x����1҃<$��9��	A���d���HH��*"H�D$�@9��������P�B�<������	���뮸�H�t$�~@88����듃�	�����s��D$0=/ �i���|$0���_ �I�1���0���9��aH�D$(�@.�����A�����@�D$0-( �����A������D$0=/ tv�D$0=_ t=0�-��A���`��- ��
����������u��H�A�<������	����������u��H�A�<������	�������|$0����������������	����E;l$������E�nA��I��A�^��A�F�����D$0=/ t����D$0=_ t�=0�J�먋D$0=������r-( �������A���Y���D$0-( ���������r7H�D$(�@,�D$%��2�A��� ���D$0=�t�r
-( ��v�A������$-n��EAD�LE�;l$����A�FA��A���I��A��E�n����r=H�D$(�@,����$�����2��A������D$0=�t�r
-( ��v�A���u����	�0������D$0=/ ��	v3�|$0���_ ����1���0�������	�������- �������p���D$0-( �����A�����- �������R��- ��
������A��A��I�����A��A��I�����E��~�<$CuA��I��A���E;l$����A�A��I����E�N�E�n��s��A��A��I���d��A��A��I���U��A��A��I���F��A��D9��*���`��DH�D$�$�M�@��f����9��
;l$�0����E�nA��I��A�^��A�F�����A��A��I�������|$0��p�����D$0H��$�H�*���"2H�
:2�a���A�����A�D$D9d$�	E�A��I��A�C�E�k����A�D$D;d$�x����E�kA��I��A�[�A��A�C��)��D��$�H�D$�R�M�@��f����9���;l$�"�������E�nA����)�A�FI��A�F�$A�F������$��+����8��I��H�D$�R�u�@��f����9���;l$������$�E�n��A��A�FA��I��A�F��]����~�<$ouA��I�����E;l$�d��A�A��I����A�V�E�n�����$��$��M��EH�D$LE�$�ED̃��@f����9���;l$�����$�E�n��E��A�FA��I��A�F�����|$0�����Q�D$0H��$�H�
���"2H��:2�R���A��E��L��$��\��H�|$x�D$0���47@84��A���:����~�<$�uA��I�����U;l$�A����A�FA��I��A�^��E�n�A�F������$��D�EED�ID�;l$����t$E�nA��I��A�F���E��I��A�v�����|$0��h���m�D$0H��$�H����"2H��:2�y���A��E��I���Z���$��BLE\$�EED�L�\$;l$�d��A�A��I����A�F�E��L�\$E�n�����|$0�������D$0H��$�H�i���"2H�I:2�u���A��E��L�\$����<$�uA��I��H�D$�R�M�@��f����9���
;l$���������E�nA����)�A�FI��A�F�A�F��T���E;l$�s����E�nA��I��A�^��A�F��%���|$0������	�D$0H��$�H�|���"2H�\:2�1���A�������<$quA��I��H�D$�R�M�@��f����9��~;l$������E�nA��I��A�^��A�F��{��L�L$1�I�yH9|$@����1�A�y
������T$0D�<$�E;l$�m����A�FA����A�I��E�n�E�N������~�<$�uA��I�����E;l$�$����A�VA��I��A�^��E�n�A�F�������~�$��)�	��6�	�B�U;l$����A�A��I����A�F�E�n�����D$�U��;l$����A�F�$��I��A�^�A����E�n�A�F��X���<$�uA��I��H�D$�R�M�@��f����9��~;l$�H�������E�nA����)�A�FI��A�F�A�F����������$��*t��7DE�LEڍE;l$��A�>A��I����A�F�E�n����H�|$(H�t$HcO8H)�H9������<@8>������tH��H�t$(�v=@8p�v��A���[��H�D$�R�M�@��f����9���;l$�\���A�DE�nA����A�I��A�F��
���$��(t��5DE�LEٍE;l$����A�A��I����A�F�E�n�����A������1����A�����A�����A������|$0H�t$���������������f�������A���m��H�t$��H��H��:TH�T$u�Bf����H€:Tt�H�T$H�T$L��$�H��A��L��$�H��H+L$X��$� P��$�PAUh�H��$�PH�|$XL��$`蛾��H��0��������$A�L$L��$�L��$����X���8��aD;d$�ɿ��H�D$H��H+D$ A�A��I��A�C�E�k��������i��;l$������$��A�VA��A���I��E�n�A�F��=��;l$�_���A�A��I����A�V�E��E�n����1��<$�����;l$�*���A�A��I����A�V�E�n����H�D$(HcH8H��H)�H9T$�x
A�����A�����H�D$(Hc@8H��H�H9T$�%A�������`�������;l$�������A�VA��I��A�^�E�n�A�N��V���;l$�x���A�A��I����A�V�E�n��2���A���)���;l$�K���A�A��I����A�V�E�n�����H�D$(H�|$L�\$D��$�L�$H�H8�
3��L�$L�\$�����A������D��$�H�|$H�H8L�\$L�$�a3��L�$L�\$���3������A���D$���1��<$����D;d$�������$�E�kA��I��A�C��T;A�S�����L��$���E��c��H�D$(�p4H�P��t~H;T$���D��$�H�|$H�H8L��$�L��$��2��L��$�L��$������A���˿��H�D$(HcH8H��H)�H9T$��A��騿��A��響��H�t$(H�|$HcN8H)�H��H9��S��v<@87�F����H�t$(H�D$�~=@8x�%�A���K���@D;d$�h�����$��D�����$��D$L�����C��D$L�	���$��D$L�*���$�������D$L�1��xH�
R"H�D$�@9������������;l$�ʻ����A�VA��I��A�^��E�n�A�F��y�����H�|$(H�t$H��<@8<��	A���T���H��$���D�����A���7���H��$�����������	�E;l$�;���A�>A��I����A�F�E�n���H�|$(H�t$HcO8H)�H9�����<@8>�����tH��H�t$(�v=@8p���A�D$D;d$�˺����E�kA��I��A�[�A��A�C��|���H�D$(�p4H�P���1H;T$���H�|$D��$�H�H8L��$�L��$�D�L$��/��D�L$L��$���L��$��d�A������D;l$�)�����A�VA��I��A�^��E�n�A�F��ؼ��A��A���̼��A���ü��I��A��A��鳼��A��I�����H��$����<�A��H�t$�EH�VH9T$@v
�~
�	;l$�������E�nA��I��A�^��A�F��J���H�|$1�H�OH9L$@�=��1��
���/��H�t$1�H�FH9D$@�����1ɀ~
�����A��E9�|j��uf�D$Hc�HT$���m���E�I��A�F�E�n�����A�2M�ZE1��A�BA�E�j�D$阻��A��鏻��H�\$PHc�Hc�H�H�H9�sH���x���@���@��@��)�H9�u�E;l$����A�؃�A�FE�E�nA�V���9D$�������E�n��I�� A�v�A�F�A�V�����H��$�������A�����;l$����A�A��I����A�V�A��E�n�龺��D����E;l$�շ��A�TE�nA����A�I��A�F�醺���|$0H��"������������H�"�BH��"|������H�D$(�p4H�P���2H;T$��D��$�H�|$H�H8L��$�L��$�D��$��,��D��$�L��$���L��$����A��E��L��$��ʹ���E;l$���A�TE�nA����A�I��A�F�闹������H�|$�@8x��������xH�
�"H�D$�@9���������H�|$�@8x����� ��A��A���/���A���&���H���|$0L��$�H��L��$�H�T$L�$��*��H�T$L�$L��$�L��$�����H�D$(�p4H�P����H;T$�f�D��$�H�|$H�H8L��$�L��$�D��$��K+��D��$�L��$���L��$���A��E��L�\$�a���f.��E;l$�v�����E�nA��I��A�^��A�F��(���H�D$(�p4H�P���uH;T$��D��$�H�|$H�H8L��$�L��$�D��$��*��D��$�L��$���L��$����A��E��I��鬷��H�D$(H�|$L�\$D��$�L�$H�H8�A*��L�$L�\$����H�D$(Hc@8H�|$@H)�H9|$����A���R���H�|$(H�t$�<@8>tA���6���A��E��I���'�����t�H�|$(H�t$�=@8~t�A������H�t$(H�|$HcN8H)�H��H9�����v<@87�������H�t$(H�D$�~=@8x���A��E��L��$�馶���H�t$(H�|$HcN8H)�H��H9�����v<@87�����t;H�|$(H�D$�w=@8p�h�A��E��I���I���A��E��L��$��5���A��E��I���&���;l$�H�����E�nA��I��A�N�A�F����A����f.��|$t�����D$4����H�D$h��$�9X��H�D$(�@,�u�Ā�ز���|$L��Ͳ��H�D$H�t$@H9������H9D$X�������$��D$L������H�D$(H�\$PH��$�H�@H)؉H��H)؉G�p�����&��H�t$(H�|$HcN8H)�H��H9��i��v<@87�\���t!H�t$(H�D$�~=@8x�?�A�����A���޴������Ӿ�������H�|$(H�t$�=@8|���A��驴��D��$�H�|$H�H8L�\$L�$�C'��L�$L�\$�������|��A���k���@A���^���A���U���A���L���DH�D$(H�|$L�\$D��$�L�$H�H8��&��L�$L�\$��u	A���
���A�����H�D$(H�t$@Hc@8H)�H9t$���A���޳��H�|$(H�t$�<@8>tA���³��A��鹳����t�H�t$(H�D$�v=@8pt�A��闳���D$L��龰��A��E��L��$��v����;l$�������A�FA����A�I��E�n�A�F��=����D$��+���I����� ���A��E��L�\$�����H�t$(H�|$HcN8H)�H��H9�����v<@87�����t:H�|$(H�D$�=@8x�f�A��E��L�\$鯲���A��鞲��A��E��L�\$鍲��fD��AWAVAUA��ATUSH��H��(H��$`H��$pD�L$D�D$(H�L$H�|$ dH�%(H��$1�A��oZ�D�L$h�,H��H����H������H����H��u��$h������$h�����$x��HDŽ$H�C H��$�H���H�D$0H��������tH�VH��$�tH�F H��$��;ERCPI���lHcD$(�|$Mc�N�dL�|I��H�D$`��I�G�H�D$8A�FH��$��Ɖ�L��$����t$@�ƃ���@��	�A�N�t$\A�v�D$Y��A�vD��$���$���$Hc�H�H�H��$�������L$%p��pE�= �]
vO=@��=P��	=0��	���H��$dH3%(�b
H��([]A\A]A^A_��
=u�DŽ$�
DŽ$��$�������N���	���$��@���L�d$0L��$�H�t$pH��L��L���s!��I��H����	M��MD�L�|$0�W���H�NH�L$0�����;ERCPI��H�D$0�1����DŽ$�\$��t�D$ ��H��$�H�D$PH���HA�F�Ã�f�\$ZA�^���\$��u$��
f�|$ZuH�\$0H��t
�C���D$D�����H�D$H������\$l�\$@�t*A�V����H�؉\$@H�\$P���T$h���D$l�D$M��H��$�M��%�D$,fDD�\$,E���jD�T$M��E����H��$�D�L$M��H��E����I9�r9�nfDHc�$H��H)�H��I9�w��$A8<$��I��L9�v7��$��t�E1�H�K8L���� ����uH��$�I��L9�w���D$�Z�����D�D$DE���3M9��:A�E9�u�,@A�E9��I��M9�u�f.��D$��RH�|$0H��t�GtL��L)�9G(w|�L$@����*L��L)�H=��1������L�H9D$8��t$h����I9�v6�H�pH�t$89���H�D$8�t$@��H�P9���H��I9�u�����s����I��I9������$��uiHc�$H��H)�H��I9�w��$A84$��I�D$I9�v�A�L$����t�������u�H��I9�u�M����H��H�K8A�L��������S���H��$��H�D$8@L��$�H��L��H��jjA�F��P��$�P�t$HD��$�L�D$H�L$XH��$訧��H��0����m����|$Y�b����T$��t(��$H��$�����I9��W��D$M�l$���M9������A�}�
����M9�����A�}
����A�F �������$����v��$�����D�\$,I��E�������DM����f�|$Z��H��$�H�D$`H�I9�vw�|$����M9�r$�d@L9��I��M9��B���H��$���$��u�Hc�$H�H��I9�r͉���$��Hc�A8Lu�����DM����M9����A�D$��<�t����DA�E��<������I��M9�u�M���������?�����$
A8D$����'������������$
A8D$�S�������Hc�$H)�I9��4�����$A84$�"�����������$
A8D$����������.���@M9�����A�EH�T$P�9�u���A�E�9�����I��M9�u����D�D$\H�K8L�������������H���H�t$HH�������M9������A�EH�L$HH�ƒ�H�������s(�l���@A�EH�ƒ�H��������I���I��M9�u��3���E1�H�K8L������������A�}�
������$��������M9������E1�A�}
A��M�����I9��|���H�p�H�t$8;T$@����;T$l����H�D$8�t$@�|$l��H�P9���9����H��I9�u�������H�h"H��$�H�D$P���M9��e�����$��tBL9���I�EI9�v'A�U����t�q������ueH��I9�u�L��I������Hc�$H�H��I9�r�����$��Hc�A8Lu��������$
A8Du�����f.�H��$�I���S���H�K8A�L���^�����M����u���A�V������D$DH�D$H�����$
A8D�����Z���H�H��$�
f��$�;���DŽ$�+���D��H��T$�/�������D$(�T$������D9�����A���������<��������������DH�\$P�D$DH�D$H�,+�=���DŽ$�
���H���D$D�����H�\$H�
��������������������������x���������n���������d�������Z������P���f.�@L���Lc�1�I)�M9���H�AHc�Lc�L��A����S�Ad����I�DL9����>L�F������RL��A�M�Z��=���M��9�t��PH�
��!A��I�A��A����Hc����B�����D)��H�
��!Hc��QH�!�!D�9��m���1�[ÐC�I��B:D��,��D)�����D��1��!@D�>L�AHE�:H��C�C8u�A��A)�E��ظ[�f�H�
��!��?L��!��[A#�H��ɍ[��������M�TM��f.�I��A�P�����?��	�M9�u�M�T����fDH�9�!��?D�H�z�!C�[B#<�L���E�[����}���J�tL���H���B�����?��	�H9�u�K�t�P����1��f.�AWI��M��AVI��AUATUSH��xH��$�H�T$�L$0L�L$ H�D$dH�%(H��$h1��D$(�D$4A�Gd�D$I�H�PI�I;G�
I��$�I;G�I��$��}H�D$ E�*M��M��H�$A��Hc�A��r�!H�=V�A��Hc�H�>��I�SM��Lc�M��N����$�L�%�!B���D$B�D�D$ A�E )�H�H��L��I+����b�8@��@��|$(A����A��|$4�~�|$8L�\$HH��M��L��L�T$HA�4�D$@L�PU�t$(L�L$ �L$PH�T$(���H�� L�T$@L�\$H��t=���uO�D$4A���A�Gf����I�A�<Tt�I�U�D$B���D$ B�DA�E )؋\$(H����1�H��$hdH3%(�_�H��x[]A\A]A^A_�DH�D$L��$�H��$�H��$�H�D$�\���f�A�Gf����I�E�/A��Tt��H���D�D$I�K,L��L��L�\$8�������a���L�\$8@M���I�FM9���)�|$����ME�oI��I�������D$XA�Gf�����D$P1�A��&M�o��E1�D$hA�]�l$�؅�t����?M�}�$��-E��~YI���I��I9��!rA�I�V9������I�ֺD)�� fDM9���@I��A�F�9��v���B�2A9�}�D;d$P���t$h���X��t$PA9���yM���M9���A�9���y���D)�I�lL���L9��i�9���hH��H9�u�L$X����<M��L9������$�I��H�l$�XH��L��M��L��SI��jUL�L$ �L$PH�T$(����H�� �������M9�s�����E�H���D�$H�����M�o����\$h�D$X���V�D$P����\���M;���7@E��D$I�vD�…�t
A����;[I�SPD�������E�oI��I�������M;���SXA��L$I�~�Ѕ�t�����]I�CP������DE�oI��I���u����M;����WE��t$I�~D�…�t
A����ZI�SPD����j���I���E�oI������f�D�D$I���E����KI9��!1�M;���VKA�6����pI�CP���0�������I�w1�A�?u9��������E�oI������E�oL�t$I�����DM;���WE��D$I�~D�…�t
A����[^I�SPD��������u���DIc��I��I9�������T���@I���I9�����;���I���H����w�K����wH�HcS L�\$8H�sI�{I���H���	���C$L�{L�\$8�D$0H�D$ E�/H�$����I�W@�Bf����H€:Tt�L�zD�j�m����M;����UA�D�L$I�~��E��t=���Y=��i����{:��
��������R���f�M;����UE�D�l$I�vD��E��t
A�����\I�SPD������������A�GL��f����H)��E<`��HH�t$H�H�NH�\$<_��J<b��JA��Ut	I9��0�H�D$ E�oI��H�$�i���A�s(M������?IM9�����M��I�FA���H9D$�k������c���A�Ex���R������J����M;���'<A��t$M�F�Ѕ�t�����W��H�5�!��H�=Z�!������H���!A��GH���T&A�GH���Hc�H�>��fDI�FM;���w���A���M�݅������H9D$�/������D��$�L�d$L��D�l$0�XA��L��H��H�uI��D��AWL��L��jjL�L$ �g���H�� ���:���������KL=����@L��t=��������Ef����Hŀ}Tt�I��I���$�����@��$�L�d$L��D�l$0�XA��L��H��H�uI��D��AWL��L��jjL�L$ ���H�� ����L��t=���������Ef����H��E<Tt�I��I��<U�_�����$��q��I����M;����RA��\$I�~�…�t=���W=�����U6�� ������&6��	�-�������H��I�oM��L����$�H��XSj�t$(L�L$ �L$PH�T$(L�\$X���H�� �������L�\$8D�Ef����Hŀ}Tt�L�}D�m�4���fDA�GE�g<S�vL�P�����C�P�����Z��h�SDH��M��I�wL����$��XSjjL�L$ �L$PH�T$(L�\$X� �H�� L�\$8����x��t=������D��f����L�H�p�8TI���yD�h�~��M;����8A�D�T$I�~��E��t=��U=������#5�� ������5=������=�������I���f�M;���'8A�>L�$M�F�T$��A����t�����TI�GM���sYA�W9����E�oM��I����M���D�d$L��L)�E����DE�G��L��A�����cH9��~M�NA�A���CXI�SHI�O�8�v�E�oM��I���,�fDM���M9��D7E�D�d$I�vD��E��t
A����SA��
�@X��3A��
������A�k|������DM;����6E�I�~D�t$D��E��t
A����`UI�SPD����N�����M���D�l$L��L)�E����TH����NI�FA�A8_�z�E�oI��I���/�f��$��CM���M9���VA�s(���5gIcS,I)�H��M9��'�A�[@A8��������A�CAA8F�����@H��!H��M��L���pm��$���L�P��$�P�t$(H�T$(L�L$ �L$P���H�� ������D����D$8���I�G�D$H�D$P��S�\$X�\$@���=$1ۃ|$@L�x��E1҉\$(�X�@�D$4@D9T$8�]
�D$P���Dh�����(���omH�
,���Hc�H�>��@A�G�D$Pf��D��I�GD�T$8�0L�x�V�@�t$X�t$@����#�D$HE�����|$@�jM�D$X�T$������4<�H�����Hc�H�>��f�A�_�D$@M�W�D$Hf���ۉ\$8@M�zA�*�$��M�CHA�(�ʼnD$P�D$���"��~[I���I��I9��bA�I�FA:,��I��)��"fDM9���/I��A�F�A:,���B�29�}�;\$8�)B�D$H���JuD�l$E���B�t$89��EmM���M9��xxA�A:,�'m���)�I�\L���DL9���]�A:,�#aH��H9�u�l$@���]JM��I9��1��$��hH��M��L��H��Uj�t$(L�L$ �L$PH�T$(� �H�� �����H��L9�s����fDA�GM�W�D$@f�����D$81�A��3��1ۉD$H�X���M���M9��@���M���M9��(�A�s(���VEM9��s�D�D$I�K,L��L��L�\$8������R�L�\$8M���IcC,I)�M9�����1��M���M9���JA��L$M�F�Ѕ�t����
Q��H�5�!��H�=G�!��L�-�!����H���!�G�D�A�|����M9���.D�T$�5�����������G�D�A�|���.Hc�I�M9���.A����E��t�=�v�H�E�!��?H�-��!D�$C�dB#D�L�����E��t!M�`E1�f�A�,$��I����?��	�D9�u���`���E�c\I���E����>I9�����$���M;�����A�s(���pXI9����D�D$I�K,L��L�\$8��������L�\$8�?��A�GE�oI��H�$�G��M;���g0A�D�D$I�~��E��t=���P=�����X,��
���w��2�f��E�H�<��H�+���M�W����t$H�D$@����E�D$8�������H���!H�H����I���L��DŽ$�H��$�A�GL�\$8��$�I�CH��$�I���H)�H)�H��$���$�H�T$��$��\$0H)‰�$�A�Gf������$�A�Gf������$��������$�A�����$�I���H��$��х���L�\$8��E�oI�����H���!H��M��L���pk��$���L�P��$�P�t$(H�T$(L�L$ �L$P���H�� ������D���I�oH���Bf����H€:Tt�H��H�rM��L����$��XSj�t$(L�L$ �L$PH�T$(L�\$X�f�H�� ���<�E�oL�\$8I������A�Gf�����A���A;S$��9A�CXE�oI����f.�H���!H��M��L���pn��$���L�P��$�P�t$(H�T$(L�L$ �L$P��H�� ������D���H���!M��L��M��H���pl��$���L�P��$�P�t$(L�L$ �L$PH�T$(�c�H�� ���9�M��������(�fDM�oE1��D$P����D$X�D$h��@E�GM�o�D$X�D$hfA��E��D�d$P���@M�oE1��D$P�D$X�D$h��@M�oA��D$P����D$X�D$h��f��D$X�K�A�GM�W1��D$@�D$Hf�����D$8����DM�W1��D$8�D$@�D$H���DM�W��D$8����D$@�D$H���f�M�W1��D$8����D$@�D$H�e���DA�G�D$Hf�����D$81�A��@���D$PI�G����f.��U�H�
��D�H�
z���I�G�4�������\$P��E։T$8�\���fDM��M��A�Zf���ۍA;C$�
�A�Bf����A���`�'<T��j�D$��$�L�%��!D�l$�XHc�L��H��A�4M��L��SAUH�t$(L�L$ �L$PH�T$(���H�� ��t=�������Ef����H��U�Ef���Ҁ|T��)����M��M���<���DA�G!M�o<F��?<K�s?<M��?��A�_"���D$8A�G$f����f��f���и���E�I��&A�‹D$���>���}I���I��I9���dA�I�NH�ƒ�H����A�T�����I�ιD)��1M9���I��A�F�H�ƒ�H����A�T���z�B�19�~�D9����D$8M��M��E�Ӆ���T��$�D�T$���D$ E����D)�D�\$H��M��L���D$(L��Pj�t$(L�L$ �L$PH�T$(�%�H�� �����D�\$B�3A9����M;����(I��A�F�H�ƒ�H����A���r���DA�G�D$H�D$Pf�����D$8I�G���A�_f�����9\$0��,I�SHc�H��������,�l
)�E�oA��F�?A��K�%=A��M��>��E�o���D$HA�GfA��E��f��f���и���E�I���D$@����?fDE�/�����$�D�d$0L�|$8M�ݍX��H�\$H��I�wM��D��UL��jSL�L$ H�T$(��H�� ����9��t=����t�A�Gf����I�A�<Tt���L��L�|$8M��<]t
<T�B�f��Ef����H��E<Tt�A���M���D�m�t$0M9�t<U��sL�}M����f.�A�G�T$f���ȅ�u/��I)�M;�����M9������M������I�փ����t�I�V�I;�����A�F���<�u��H�����<�t��A�GI�of����I�A�<F�Y4<K��<<M�I4��A�_A�������D$@A�Gf����f��f����DE�I��D�l$8����;M;���T>D�D$A�L�|$HM��E��E��A��E��M���5�M��H���-��������A��A9��;M9�$���<A�?M�G�����v�E��t�H�
d�!��?H�5��!D�C�IB#<�L���E�I���t�K�tL��fDH���B�����?��	�H9�u�O�|�b����A�GI���M��f����H�1�H9�t�Cf������$�I���IcU H��$�H��$�H�D$ I���I�GH��$�H�Љ�$�H�����S2L��$�L��$�L��I�u1��e����D$0L�|$(I�ߋ�$�L�%{�!��$��L��M��L�d$<`@�Ń��t$�H��A�tI��L���D$ L�PUATL�L$ �L$PH�T$(�9��H�� ����6H��$���t=�����9H�D$ Hc�$�H�{H���H����A�Gf����I�A�<T�r���H��$�L�|$(H��$�H���L9����H���!�1���@I�G�D$8�D$H�D$P����A�GI�W�D$X�D$@������II���D$HA��D$P�D$8����X�D��������D$8D9��I[M���L��M9��(RL�t$8H�=2�!A��L�|$@H�5��!L�\$PH���!D�|$(D�\$4�ND��A�������D��G�D�D9�����D9��a"Mc�A��L�E9��N"I9���QD�EA�D��A���v�H�
/�!��?�H�q�!�[D#�I���A���t%H�]E1�����H����?��A	�D9�u�A���H����D$8D9��;ZM���L��M9��R2L�t$8H�=$�!L�|$@D�t$4A��H�5��!H���!L�\$PL�-��!�QD��A�������D��G�D�E9t�����;D$(�N!Hc�A��H�E9��;!L9���1D�E�D��A���v�H�
�!��?D�H�^�!C�[F#�L���A��E��t)L�]E1�f.�A���I����?��A	�D9�u���@����D$8D9��#YM���L��M9��:1L�|$@H�=�!A��L�\$PH�5��!H�~�!D�\$(L�t$8�Wf�D��A�������D��G�L��A�<������	���D9��7 Mc�A��L�E9��$ I9���0D�EA��D��A���v�H�
�!��?D�$H�I�!C�dF#�M���A��E��t%L�eE1�DA�$��I����?��A	�D9�u�A���>����t$8D9��XM���L��M9��(0�L$(��H�A��H�D9�t2I9��0�U����vH�\�!��?�����t�fDD�T$HE�����$�I��H�l$M�ݍXH��M��L��L��SjUL�L$ �L$PH�T$(�9��H�� �����I�T$�M9�����D�L$E����I��믋D$8D9��/WM���L��M9��F/L�t$8H�=�!A��L�|$@H�5��!L�\$PH���!D�|$(D�\$4�Sf�D��A�������D��G��D9�����D9��BMc�A��L�E9��/I9���.D�EA�D��A���v�H�
�!��?�H�R�!�[D#�I���A���tH�]E1����H����?��A	�D9�u�A���P���A�Gf����I�A�?Tt�A���I���D$0E�/�y��A�V�I�F�����uf�H�������t�I9��vI����0����TI�CP����0�+���$���AA8G�ƒ���8��=��E�oM��I�������Q�����B�<������	�8�u��	��f���u����fD��L�-�!�QA�GA9D��M��Ѓ���A�������M�ƃ�D�\$H��M��L���D$(L��Pj�t$(L�L$ �L$PH�T$(���H�� ���{��D�\$D9��k��M;���UA�M�F�Ё���w���H�
)�!��?H�5o�!�<�#�D�O������gK�|L��DH���F�����?��	�H9�u�O�t���������N�'�������M���M9���'��8�� t��,=�t=�����fD��D9��tM9��P'A�M�F��=���IM��=t�v�=/ t��
,=_ t�=0t��J��M���M9���&��!��
���'����D9���M9���&A�I�^I�މ�=��I=�t�v�-( ��v�����I���I��I9���UA�������I�KPI��������D)��#M9���A�������I�������B�2A9�}��YM���M9��.&�A���x#I�KP�t�M��DA���<��V<I��M9�r�A9���%���������I���I��I9���TA�������I�KPI��������D)��A�������I�������B�2A9�|�M9�u�M��A���H9t$����fD�������M���M9�����-M�ƃ�wI�CP���\����D9��0���M9��XA�M�F�Ё��v�H�
�!��?H�5\�!�<�#��o����t�I�|.L���H���F�����?��	�H9�u�M�t(�n���M���M9��<i�I�FL9�s1A�V����t�X��������@H��L9�u�I��A9��d���M�����M���M9��}L��M�c,A�s(���LCIcS,L��H)�H��H9�wA�{@@8;�QH�CM��L9��xY�S����t�>�������>H��L9�u�A9�����A���M�݅�����L9t$����A�Ex�������j��I���I��I9��ZRA�������I�KPI��������D)��A����u��I����g��B�2A9��:���M9�u����M���M9��Q����A���x"I�KP�t�$��@A���<��A9I��M9�r�A9���������M���M9��ug�A�I�v��=��ZO��
�xP�Q@��
�0@��D9��{PL9��@YI���M���Ic�I)�M9�����Iƻ�����3�M���M9��|�������	�[����D9��/���M9��W���A�M�F��=��EM��=�#����&�� ���v�=����=�u����M���M9��������!��
��������D9������M9�����A�M�F��=��EM��=�����v�-( ��w����������fDD�L$D��E������~QI���I��I9��GCI�FA:.�B���I��)��fDM9���I��A:n����B�29�}�;\$8�n#D�D$HE���XU�|$���-$�D$89���NM���M9��s[A:.�iN��)�I�\L���f�L9��g?@:(�wBH��H9�u�L$@����+M��I9��}����$��hH��M��L��H��Uj�t$(L�L$ �L$PH�T$(�l��H�� ���B��H��L9�s��2��f�I�SH���E��~UI���I��I9��$DA�I�N:����I�ιD)��M9���I��A�F�:����B�1A9�}�D;d$P���D�D$hE����Y�t$PA9��(LM���M9��XVA�:�L���D)�I�lL����L9���:�:
��:H��H9�u�|$X���M��I9��:����$�I��H�l$�XH��L��M��L��SI��jUL�L$ �L$PH�T$(� ��H�� ������M9�s�����f.�������M���M9���L��M��M��A��L��M���'�L��A9�������9���I9��?�0L�h��v�H�
X�!��?L���!D�C�IC#4�L���E�I����t�N�DL��f�H���B�����?��	�L9�u�K�D
�z�������D��M���M9��`*L��L�|$XM��A��L��D�\$PM��M���,D��A�A9�����L���9��I9��N�L�h�с��v�H�5��!��?L�
��!D�$C�dC#�E�L$����E����!N�dL���H���F�����?��	�L9�u�K�D
����s���I���X���L�xE1һ���������|$@L�x�D$H���ډ\$(�X�@�D$4E���������^���H�
G�Hc�H�>�������M�����t$�I�΃�A9��7M9��eI�NA���t܀��v�H�=p�!��?���tŃ�Hc�L�t�M�����l$D�d$4D�l$(�S@I�މ�H�5L�!��H�
��!������H��!�Q��D9�����D9��
����A9��q:M9���A�I�^�Ѕ�t����v�H�
��!��?H�5	�!D�C�@B#�E�p����E���c���E1�H��DH���F�����?��	�D9�u�N�t3�:���M�����l$L��D�d$4D�l$(�Y�H�؉�L�\�!��H�5��!��A�����VH��!�T�D9�����D9������A9��B:I9����H�X�х�t����v�H�5��!��?L��!D�4C�vC#�L���E�v����`���N�D0H���H���F�����?��	�L9�u�J�D3�2���M�����l$L��D�d$4D�l$(�]H��A��H�k�!��H�5��!A��H�
~�!F�A��D��4VH��!�T�D9$�����D9������A9��S9I9���;�H�X�х�t����v�H�5��!��?L��!D�4C�vC#�L���E�v����T���N�D0H��@H���F�����?��	�L9�u�J�D3�*���M�����l$D�d$(�ZI�މ�H�5|�!��H�
��!������H�7�!�Q�T��B�<������	���D9��2����A9���7M9��.���A�I�^�Ѕ�t����v�H�
��!��?H�5.�!D�C�@B#�E�h����E���X���E1�H��f�H���F�����?��	�D9�u�N�t+�2�����|$@��/�D$X�|$�����><�����H�����Hc�H�>��D9T$8�@SM���M9���U1�|$@L��@���2��
������9���Hc�A��H�D9T$8��L9������=��<=��j=v�-( �������D9T$8��RM���M9��gU1�|$@L��@���K�� ��:���=�t
1�=���9��Hc�A��H�D9T$8�L9��n����=��;=�q:v�=/ �d:�{�=_ t�1�=0���D9T$8��QM���M9���TL���A��Ё����1��
�'-��
t+A�s|����J�B���v���t��( ����JMc�L�A��D9T$8��JL9�r�H���I����D$8H��D)�L)�9���AI��L$H���/H�݋�$�M��I��H�l$�XH��M��L��L��SjUL�L$ �L$PH�T$(�E��H�� �����I�T$�M9��
��A�D$���<�ufDH�����<�t�I���D9T$8��PM���L��M9�r9�>AfDI�CP���C���Hc�A��H�D9T$8�.���L9��
A���Ё��v�H�
a�!��?�4H���!�v#�H�����t H�sE1����H����?��	�D9�u������{����f���fDD9T$8��OM���L��M9�r4��I�CP�������Hc�A��H�D9T$8�n���L9������Ё��v�H�
��!��?�4H��!�v#�H�����t H�sE1����H����?��	�D9�u������k������D9T$8�9OM���L��M9�r:�?�I�CP������Hc�A��H�D9T$8�����L9���?���Ё��v�H�
�!��?�4H�#�!�v#�H�����t H�sE1����H����?��	�D9�u������{����f���fD�D$8=����KD9��fNM���L��M�c,M9��QA�s(���%8IcS,L��H)�H��H9�wA�[@8]�CH�EL��L9���K�U����t�'5@������5H��L9�u�A�B9D$8�����A���H9\$�{������s���A�Cx���b����x��f.�D9T$8��MM���L��M9�r4�@I�CP���#���Hc�A��H�D9T$8����L9��x������Ё��v�H�
A�!��?�4H���!�v#�H�����t H�sE1����H����?��	�D9�u������k������D9T$8��LM���L��M9�r:����I�CP���c���Hc�A��H�D9T$8�N���L9���������Ё��v�H�
��!��?�4H�þ!�v#�H�����t H�sE1����H����?��	�D9�u������{����f���fDD9T$8�LM���L��M9�r4�'����I�CP�������Hc�A��H�D9T$8�����L9���������Ё��v�H�
��!��?�4H��!�v#�H�����t H�sE1����H����?��	�D9�u������k��������D$8=�����HI������A�D$���<��f��H�����<�t��L�DE�/I����@M��A���������H9L$�9�����fD<T�i*�D$����-( ����������=�����=��������D=/ ������- ��
�������@A����m���u���A��������W���`�����	�����R���f�=/ ����vC=_ �x��=0�m���(����=_ ����=0�J������D- ��
�2�����D-( ������տ��DM���h���H���!��?�4H��!�v#��FH���Hc���H�t$8M�|5����I�U1����H����?��	�9�u��D$p�$tQH�5^�!�؉�H�=��!��������H��!�G�|��9�tH��$`L�\$@���L�\$@�D$pE���lHc\$p�M���L�l$`L��$�A��L��L��H+D$8��H)�I��L�\$xH�|$@H��$`H�|$HL��$��.fDH�T$8H�t$`L�������uLt$8A��E9���M9�sӅ��L9t$@��H�t$HH��L����������I��I���I��I9��N1I�KPA�I�F��"����I��)��I��A�F������B�2A9����M9�uېM��A������޽��H9t$���ν��fDM��A���H9L$��鮽��fDD�\$I��E���Q��A�I�H�$H�D$ DŽ$����L��$�L9t$L�l$x��M9���A������F������>���A�Ex���-��������L�t$8L�|$@L�\$P��M��A���L9t$�_���<�b�H�����Hc�H�>��M���Ic�I)�M9��8����I���I��I9���-L����PD)�A9���L9��`���H���P���
�w�����
vҀ��t��h���M���M9��`OM��I�[,�AIcS,I)�M9�wA�s@A8u�08I��D��D)�A9���DM���M9��TDA�s(��t�E1�H��L��L��D�T$hL�\$`�K��L�\$`D�T$h��t��ػ���I���I��I9���.I�KPA�I�F�������I��)��I��A�F�������B�2A9��[�M9�u����I���I��I9��9I�KPA�I�F��H����I��)��I��A�F���)���B�2A9����M9�u��_�I���I��I9���8I�KPA�I�F�����I��)��I��A�F���ʺ��B�2A9����M9�u���I���I��I9���-I�KPA�I�F�������I��)��I��A�F���k���B�2A9��>�M9�u��d���I���I��I9���7I�KPA�I�F��+����I��)��I��A�F������B�2A9����M9�u��B�I���I��I9���*L����PD)�A9���L9������H���P���
rۀ�
��������u�饹��I���I��I9���*L����PD)�A9�|uL9��Q���H���P��� t߀��tڀ�	�^�����I���I��I9��A*L���	L9�����H���P��� �*�������!�����	�����PD)�A9�}�I�ƻ�������M���M9���KA��I�V<w0<�1&<
�ϸ����D9�<L9���;I��A�I�V<v�<
��%L9���;A�~
�G��A9�}�I�փ��8��A�Ch1��*��M���M)�A�i���������W�������M���<���H�8���Hc�H�>��D9T$8��:I���I��I9��i3A��P���v=���:�D$8��D)�I�tL��H��H9�tL9��13��J���v��t�H����t$8D9��A:I���I��I9���2A��P����:=��:���D)�I�tL���L9���2��J���v����t�H��H9�u��D9T$8��9I���I��I9���2A��ƒ��� t<	��9�D$8��D)�I�tL��H��H9��1���L9��F2��у��� t݀�	t������|$8D9��Z9I���I��I9���DA��ƒ��� �59<	�-9����D)�I�tL���L9�t~��у��� �������	�����H��H9�u����D9T$8��8M���L��M9�s<���
����
tA�S|��ud�P���v=�uUH��A��D9T$8tFL9�r�A���H9\$s3��t/A�Cx��~"鏼���I����D$8H��D)�L)�9�w�I��D$H������$�I��M�ݍXM9������H��L��M��L��SI��j�t$(L�L$ �L$PH�T$(蔳��H�� ��t��i���D9T$8��7M���M9���C�D$8L��M�c,��D)�M�l�7IcS,I)�L9�wA�s@@83�31H��L9��I���M���I9������A�s(��t�E1�L��L��H��L�\$8�?��L�\$8��t��
�����t$8D9��V7I���I��I9���BI�KPA���17���D)�I�tL���L9��~����������H��H9�u�����|$8D9���6I���I��I9���BI�KPA����6����D)�I�tL���L9��������K���H��H9�u��=����t$8D9���6I���I��I9��'BI�KPA���a6���D)�I�tL���L9�����������H��H9�u������|$8D9��6I���I��I9���.I�KPA����5����D)�I�tL���L9���.���{���H��H9�u��m����t$8D9���5I���I��I9��WAI�KPA����5���D)�I�tL���L9�����������H��H9�u������\$8D9��N5M���M9���@I�SPA���,5��L���D)�I�L�
�������H��H9������L9�u��^���I���H��trA�Wf����f�����
�Y�\$89���
��guHA�[0M�K8��~<A�f����9���CMck4M��1��A�$f����9���.��M�9�u�A�Gf����L����I��D��M��M��I�����M��M��I��I��D��L�|$X����L9��`���A�V����t�N���fD������9���H��I9�u��+���M��I��A������q���I���M��I���5�I��M��A���L9t$�����!����P�����5<]��51��-���A�[ I�KHc�)�HcË\$0����L��I+��9ӉD��BOÉD$0����A���L9t$s��u1�鴴��A�Cx���Q���1�霴��1�I9�t'M;��wI�F�I���A�F�I�SP�����M;��s�A�I�KP������N���IcS,L��H)�H��I9����A�[@A8�ٰ�����!���A�CAA8F������
���DH���_I�CHA�I�VA�w�80���E�oI��I��閮��A�[`���į��E�SlM���E������M9��G���A�s(���#M9������D�D$I�K,L��L��L�\$8�������q���L�\$8M���IcC,I)�M9�������P���H�A�!H��H��H��$�H���DAHc�$�L��$�H������$�ij��I9������������D$@��D$8�����Ef�����A���A;S$��A�CXI���H��t	;C��"E�/�д��E�j�U���D�l$8D9��a7M���L��M9���L�T$8D�T$P�-f���A�A9���Hc���H�A9���I9����U��Ё��v�H�
�!��?�4H�E�!�v#�H�����t"H�uE1�D���H����?��	�D9�u������{����f���fDD�l$8D9���M���L��M9�w'�9fDA9�tyHc���H�A9�tkL9���U��Ё��v�H�
J�!��?�4H���!�v#�H�����t!H�uE1�@���H����?��	�D9�u���L���t$@���;��$�M�ݍXH��M��L��H��Sj�t$(L�L$ �L$PH�T$(���H�� ���Ь��H�U�L9�������E���<�ufDH�����<�t�H���L���Q��L�t$8L�|$@L�\$PA���H9l$��������A�Cx����������fD- ��
�����=���D��	�����*���f.�=/ ����w- ��
��������=_ �����=0�������DI��I��f.�A�Gf����I�A�?Tt�鿳��1҃�	����L�l$`L��$�L�\$xD;d$P�����\$h���48L��D;d$P��M���Hc\$pL�l$`E��L�t$hL��L��H+D$8L�|$pH)�D�|$PA��I��H�|$@H��$`H�|$HL�\$PL�L$x�-DH�T$8H�t$`H��膾����uHl$8A��E9��fI9�s�E���H9l$@��H�t$HH��H���G�������H��- 1҃�����H���I��I��H�D$�E�&���I�����L��M��L�|$8H�D$�E���H��$�L�|$(�$H��$�H���L9���H���!��$�4���E�/I�����IcC,L��Hc�H)�I9�����A�[@A8�����������A�[AA8^���飺��DH�ɥ!H�H����I���L��t$0L�\$8DŽ$�A�GH��$���$�I�CH��$�I���H)�H)�H��$���$�H�L$��$�H)���$�A�Gf������$�A�Gf������$��������$�A�����$�I���H��$��҅�����L�\$8�����H���!�@SI�A�G釲�����c��M���M9��%�L���1�Ѓ���A�D�������L���9��&I9���L�`�с��v�H�5Z�!��?L���!D�4C�vC#�L���E�v����LN�D0L��H���F�����?��	�L9�u�K�D4����g:��N�q����
���A��H��I����F��D�,H������t$H�4������EƉD$@������FH�ؗI��"�Ã��\$8�H���D�����E��DD�����D$I��!��D$8A���������}���L�|$(L9��`����$H�������D$@�D$@�D$8����D$X�D$X�D$P�5���M��L�|$HM��;\$8�4���|$@M�݅�����$����D$ H��M��L��L���D$(Pj�t$(L�L$ �L$PH�T$(���H�� ���¦��;\$8�����M;���L���A�>�t$M�f����t����[H�������{�����M���y�����FH�|�I���Ã��\$@�H�^���D$8���5�����/�D$8����(��L�$L�ى�L����L�\$8����L�\$8����A���M�݅���M;��������E�jI��除��E�jI��錤��������M��M��M���H�L�T$8D�d$@E��uË�$�M�ݍXH��M��L��H��Sj�t$(L�L$ �L$PH�T$(肣��H�� ���X���H�U�L9��I����E���<�ufDH�����<�t�H���E����A�Hc�L�|$PM��H�D$8E��M���fDA��L|$8E9���L�$L���L�����à����u�A��$�M��M�������M9�$�����馤��fDM���;�M��M��I�rI��c���W��$�M����Pj�t$(L�L$ M��L���L$PH�T$(�t���H�� �M���M���M9��I���D$�D$`H�=}�!H�5&�!L�|$p��H��!L�-9�!A���D$h�E�I�nD��A���v�|$h�!I��D��A�������A�B�G�D�A�|������M9�r@�f.������������G�D�A�|���Mc�M�M9�soA�A���E��t�=�v�L�8�!��?H�~�!A�,�Lm#�I������t&I�nE1���]��H����?��	�D9�u�A���`���L�|$pD;T$`��������D$`�D$`D9���4M9����������H�
��!��?H��!D�$C�dF#�L���E�d$�A�������O�t&H��DH���C�����?��A	�L9�u�N�t%���H�?�!��?�4H���!�vD#�D�N�D�����A������$K�tH��@H���B�����?��A	�H9�u�N�tA��������L���s���fDH�ɝ!��?�<H��!�D#�D�O�D�����A����t'K�|H��f�H���B�����?��A	�H9�u�J�tA����c���遡���H�
a�!��?H�5��!�<�#�D�O�����@���K�|L��DH���F�����?��	�H9�u�O�D����H��!��?�<H�J�!�D#��_�D�����A�����Ȭ��I�|H��fDH���B�����?��A	�H9�u�H�t阬��H�
��!��?H�5�!D�C�@B#�E�H����E���#���E1�H���H���V�����?��	�D9�u�J�|���H�
P�!��?H�5��!D�C�@B#�E�H����E�������O�DH���H���V�����?��	�L9�u�J�|鋪��H��!��?�4H�2�!�v#<��^������I�tL��DH���B�����?��	�H9�u�M�tI�GM����%����E�oI���\���H�
w�!��?H�5��!D�C�@B#�E�H����E���Q���O�DH���H���V�����?��	�L9�u�J�|�!���H�
�!��?H�5^�!D�C�@B#�E�H����E��t(E1�H��fDH���F�����?��	�D9�u�J�|��������Ӟ��A�WI�O�����H�=��!H�փ�?�47��Hc�H9��(�F�M�|�
@�H��I��H�AA8V��x���L9�u�D�i�/���H�J�!��?�4H���!�vD#�D�N�D�����A�����1�H��f�H���B�����?��A	�9�u�N�tA��������L���6����H�
ٙ!��?H�5�!�<�#��_�����̮��I�|L��fDH���F�����?��	�H9�u�M�D靮��H���!��?�<H�™!�D#�D�O�D�����A����t.K�|H��f�H���B�����?��A	�H9�u�J�tA��������1����H��!��?�4H�S�!�vD#�D�N�D�����A������1�H��f�H���B�����?��A	�9�u�N�tA����g���L���4����H�
��!��?H�5ߘ!D�C�@B#�E�H����E���5���E1�H��H���V�����?��	�D9�u�J�|����A�k`�����N���fD�؁���Hc�M�|?A9�t<D��H�5*�!D��H�=p�!��������H��!�GDD�D9���E�/M��骚��I9��ɞ��1��>
��H�鹞��A�s I�{Lc‹\$0)�Hc��4�B�4�L��I+��9�B�t��rO�t$0���M��I�SHA�O���4
@84:������o����A�wf�����9t$0~I�KHc�D��E���������e�U�E�c0�I�K8E���B��f����9��-Ic[4H��E1���f����9���A��H�E9�u�����A9��s�����f���A9���������H�_L9��>���
��A��D9T$8�%��H�����L�t$hL�|$pL�\$PD�L$XE���5����$�M�ݍXH��M��L��H��Sj�t$(L�L$ �L$PH�T$(�Y���H�� ���/���L9��$����E�H�U���<�uDH�����<�t�H���D9T$8��'M���M9��*�D$L�t$PL��H�='�!H�5Е!L�|$X��H���!L�-�!A���D$@D�EL�eD��A���v�|$@��L��D��A�������A�B�G�D�A�|���
I9�w7�������������G�D�A�|���Mc�L�I9�vw�EA���E��t�=�v�L��!��?H�.�!E�$C�dB#�M�����E��t%L�eE1�fDA�$��I����?��	�D9�u�A���_����L�t$PL�|$XA�B;D$8�ZD�D$HE���J����$�M��D�`H��M��L��H��ATj�t$(L�L$ �L$PH�T$(�l���H�� ���B���H�E�L9��3���H��L���!L�
*�!H�=��!L�D�!�t$�5fDD��A����A���B�A�C�D�A�<��i���H��D�UD�҅�t��H���U�Ѓ�<�t�D��A���v�H���!��?�H�͓!�[D#��A����y���H�U1����H����?��A	�9�u��U���DA��D9T$8��I9������L�t$PL�|$XA���H9l$��������������A�Cx�{���鴝��fDH�
�!��?H�'�!D�4C�vF#�L���E�v�A���F���J�l5L��fDH���C�����?��A	�H9�u�K�l4�����D$(1��G���L�%��!����H�CL9������{
��
A��D9T$8����H������H��D9�����4����|$@A�_�D$PA����D$8���I�����D$H�D$(A�G��D$4�
��L�����H�
Ց!��?�4H��!�v#�I�����t#H�w1�����H����?��	�9�u�A�����H���H��H��A���H9l$�3�����+��A�Cx����������IcK,H�H��I9��M�����A�[@��Hc�A8�7������Θ��A�CAA8D����鹘��f�H���
���H��A���H9l$���������A�Cx���ߖ��郛��D��D9�����L9��7���I�����L�T$8A��������H9l$���A�Cx������,���fDI��M����I����A���H9l$�9����1�A�Cx��� ����DH��!��?�4H�S�!�v#<�D�F�����z�1�L��H���B�����?��	�9�u�O�d�U�L�L$xL�\$PL�t$hL�|$pI9�A���@��H9l$��@���2������*�����A�Cx�����=���������A�C|��������q����=�t��c���-( ��v��T���M��L�|$PM��D;l$@�^����D$HD�l$M�݅���
��$�D�`Hc�H�D$ H��M��L��L��ATj�t$(L�L$ �L$PH�T$(����H�� ������|$9|$@�Ւ��L�$L���L����貎������Lt$ �D$�<������A�C|������镒��D��>�����4���H��A���H9\$����������A�Cx���������f�H��A����������H9\$�����A�Cx���}���默��DD�D$L��L��H��D�T$hL�\$`�K��������L�\$`D�T$hM���陼��I�ƻ�x���I�ƻ�k���D�L$E����D9��HI���L9���$A�H�ƒ�H����A����A�B�)�I�tL���#H9����H�у�H����A���s	H��H9�u�H��I9��%�����$��XH��M��L��H��Sj�t$(L�L$ �L$PH�T$(����H�� �����H��L9�s��ڐ��L��H���!��?�H���!�RD#�H���A���t%I�W1�f����H����?��A	�9�u��Hc��ٛ��;\$8��M9�����L$M����D$(�D�A�H��L�T$ 譣��L�T$ ����Mc��M�;\$8trM9���LA�:�����v��|$(t�H��!��?�H�"�!�R#<�I������tI�R1����H����?��	�9�u�A���h���M��$��XH��M��L��L��Sj�t$(L�L$ �L$PH�T$(L�T$@虍��H�� ���o���L�T$ I�B�M9��[����T$��uI���A�R�����u�H�������t���H���ޞ��H��銽�����i�I������M��L������H��A��D9T$8�z����fDH�
��!��?H�=�!D�C�@B#�L���E�@���������1�DI��A�V�����?��	�9�u�N�t鍶��H�
h�!��?H�5��!�<�#��o�������I�|.L��DH���V�����?��	�H9�u�M�t(鼵��H�
�!��?H�5V�!�<�#��o�����Ժ��I�|.L��DH���V�����?��	�H9�u�M�t(驺����?��L�t$8L�|$@L�\$PA������O���H9l$�D����3�H�
��!��?H�5Ɖ!�<�#��o�����Ǻ��I�|.L��DH���V�����?��	�H9�u�M�t(霺��Hc�I��I����H�
�!��?�4H�[�!�v#�H�����t H�sE1����H����?��	�D9�u�����M��L������H�
��!��?�4H��!�v#�H�����tH�sE1�f����H����?��	�D9�u�����M��L������IcC,L��Hc�H)�I9��q���A�[@A8�c���������A�[AA8^�K�������fDD�D$L��L��H��L�\$@D�T$P胠��L�\$@����
M���D�T$P���D�D$I�K,L��L��L�\$8�L���L�\$8�������ڋ��f.�H�����L�t$PL�|$X�(�H�����H�HcS L�\$8I�{H�sI���H���k���鍐����F��H�
]�!��?H�=��!D�C�@B#4����E��tI�~E1����H����?��	�D9�u������1��%���D9���M���M��M9�w9����у���A�����H���I�D9�t{M9���A���с��v�H���!��?�4H��!�v#�H�����t!I�rE1�@���H����?��	�D9�u�G����k�����N�x�����$��XH��M��L��L��Sj�t$(L�L$ �L$PH�T$(L�T$8�L���H�� ���"���L�T$I�R�M9�����A�B���<�uf�H�����<�t�I���A��D�T$ �\$@M��A�����A���B���H��xJc�H�>��$��XH��M��L��L��Sj�t$(L�L$ �L$PH�T$(訇��H�� ���~����t$ 9t$8�n���M;���X��A�I�v��=��z��H�
K�!��������H���!�BH��!�D�;D$4����;D$(�	����D$ I���M�����$��XH��M��L��L��Sj�t$(L�L$ �L$PH�T$(���H�� ��������t$ 9t$8�����M;������A�I�v��=��G��H�
��!���|$4������H�Ą!�BH�A�!�T�H���!9<�����;D$(�B����D$ I���C�����$��XH��M��L��L��Sj�t$(L�L$ �L$PH�T$(�$���H�� ��������t$ 9t$8���M;������A�I�v��=�����H�
ǃ!��������H��!�BH�~�!�T��B�<������	���;D$(�|����D$ I���D�����$��XH��M��L��L��Sj�t$(L�L$ �L$PH�T$(�^���H�� ���4����t$ 9t$8�$���M;�����A�I�V<��V�|$(������D$ I��덋�$��XH��M��L��L��Sj�t$(L�L$ �L$PH�T$(���H�� ��������t$ 9t$8�����M;���=��A�I�v��=����H�
��!��������H���!�BH�;�!��;D$4����;D$(�C����D$ I���N���H��!��?�4H�Y�!�v#�H��ɍv������I�l6L��H���B�����?��	�H9�u�M�L1��H�
��!��?H�=�!D�C�@B#4����E��tH��1����H����?��	�9�u���`�������DA���������H9\$�������Hc�M��|$@H�D$ 9|$|�Uf��D$Ld$ �D$9D$@t6L�$L���L���������u�A�����tM9��wL9d$�]M9��������$�Hc�XH��M��L��L��Sj�t$(L�L$ �L$PH�T$(���H�� �������I)�M9�s�鰄��L���H���L��A���������H9\$�������L��郆��L���$���H�
W�!��?H�=��!D�C�@B#�A�h����E���w���E1�H��H���W�����?��	�D9�u�H�t.�P���������A�CA8C�ծ�������������A�CAA8E������������A�CA8C�������M��A������
�L9T$����A�Ex����F����M��L9t$�,
�D$0M���A���H�D$I�������L���±��L������L9����>
���A9������I�����ސ��M9�seI��I�D$L�l$PM��H�D$X� A�EL�L$`f����9D$8�W��M9�s.Ll$PH�|$XL�\$HI�u�T$@L�L$`�Y����T$@L�\$H��t���9�����Mck4I�D$H�D$@O�d,�)A�D$��T$Hf����9D$8������M�9�����H�|$@L��L�\$P�T$H���L�\$P��t��m��M��L��锫��I���h�M���x�I����H9�suH��H�BH�\$PH��H�D$X�/�H�L$8f�����9D$0~I�sH�D��E���O��H9�s0H\$PH�|$XL�\$HH�sH�T$@H�L$8�P���H�T$@L�\$H��t�A��E9�����IcK4H�BH�D$8H�\
�9��C�H�L$@f�����9D$0~I�SH��<�����A��H�E9��v��H�|$8H��L�\$HH�L$@�є��L�\$H��t��Q����tA�CA8E�ۼ��H��髹����$����D$(���X�|$H�-�p��H�-8pH��M��L��L���D$0Pj�t$(L�L$ �L$PH�T$(��~��H�� ��������t$ 9t$8�����I���I9��6�����~�t$XI�NA���@�����@��Hct�H�>��M9��v
A�>
uI���D$ H��M��L��L���D$0Pj�t$(L�L$ �L$PH�T$(�>~��H�� �������t$ 9t$8����I���I9���������	A�I�V��=����|$X�O����=���H�5No��Hc�H�>��D$ I��H��M��L��L���D$0Pj�t$(L�L$ �L$PH�T$(�}��H�� ���t���t$ 9t$8�d��I���I9�����A�I�^D�t$��E��t=���
��L�
4{!��L�z{!��H�=�z!L�A{!A����A�C�D�A�<�u3��~����ƒ���A����A�C�D�A�<�����Hc�H�H9��	�����t$PD�d$�‰�E��t�=�v�H�
z!��?�4H��z!�v#�I�����tH�sE1����H����?��	�D9�u�A�L$�a���<
�I~��<
v<��=~���D$ I���H���<
r�<
�$~��<�u��~��< t�<�t�<	�~����< �~��<���}��<	u���}��<��<�H<
t���}��I�UP�u���}��I�UP�t��}��I�UP��n����}��I�UP��[����}��I�UP��H����}��I�UP��5����m}��L�����M��M��A�Bf����I�D�8Tu�Pf����HЀ8Tt�H�p���I���"����$��D$M��M��L�%y!�XA�ARSA�4�D$(L�P�|��H���x���A9��Q��M��A�������|��H9T$����|��M���$����A9���������$�M�݃��|$�D$ uiD)�H��M��L��L���D$(Pj�t$(L�L$ �L$PH�T$(�{z��H�� ���Q|��B�39D$8�A|��M;������I��A:n�u��%|��DH��M��L��L���D$(Pj�t$(L�L$ �L$PH�T$(�z��H�� ����{��9\$8��{��M;���ɾ��A�I�v��=��5
I��A9���{�����I��鬦��A���H9l$�m����e���A�Ex�T��"���M�������$��|$M��D�`uoD)�H��M��L��L��ATj�t$(L�L$ �L$PH�T$(�Ly��H�� ���"{��B�39D$8�{��M;������I��A�F�I�UH@:,u���z��DH��M��L��L��ATj�t$(L�L$ �L$PH�T$(��x��H�� ����z��9\$8��z��M;���@��A�I�V��=��nI�MH���9D$P�tz��I�փ��A9��2���M��I��A���L9\$������Fz��L���9�L����A��������L9T$�����A�Ex���鴀��M�������$�M��E)�hH��M��L��L��Uj�t$(L�L$ �L$PH�T$(��w��H�� ����y��C�49D$P��y��M;�������I��A�F�9�t��y��M���L��I�k,L9��}���A�s(��utIcS,L��H)�H��H9�wA�s@@83��H�CL9�sF�S����t�8������u%H��L9�u�L��A��A�����u�隱���H����D�D$H��L��H��L�\$8D�T$@�>���L�\$8���d���M���D�T$@�q������J���A�CA8C�Z����7���H��锴��D9��qI���I��I9��$L��H�BL9��q�R����t�I������uH��L9�u�A�B9D$8�ɰ���7���A��D9T$8�L9��H���L����A�u(����IcM,H)�H��I9�wA�}@A8>��
��@����w��I�N���A����������w��M��M��A�������w��L9d$�Ȣ���w��M����B����}a��$�M��M��E�A�ԍXA��A��W��L�$M��H��L��AUSAT�t$(L�L$@�L$PH�T$(�Yu��H�� ���/w��L�$I��靹��L���x���|$@M���D$8�����������$�M��E)�hH��M��L��L��Uj�t$(L�L$ �L$PH�T$(��t��H�� ����v��C�49D$P��v��M;�������I��A�F�I�UH:t��v���Ӊ��L9t$M��A�����M9��„��˟���^v��L����A�{p�Kv��A�{t���Ic��I��H9D$����"v��L��魮��A�u(���IIcM,H)�H��I9�wA�}@A8>����@����u��A�I����=������H��q!��?H�5�q!�<
k�#��w���t$M�D6L��H���W�����?��	�L9�u�M�t6�t$X�V��������HcT�H�>��I��=��
���wh��
������Hu��I��=��:u��wZ��
������'u��I��=���wZ�� ���vA=����=������t��-( �������t��-( �������t����	�t��t��=/ �d���=_ �S�=0�H��t��I��=��t������ �tt������	���`t��I��=��Rt��I�UP�������=t��I��=����I�UP�������t��I�փ�
���w2��
�����s��A�}|�����s��- ��
�����s��=�t���s��-( ��v��s��=���s��=��Z��s��=/ ��s���/- ��
�6��s��I��=��qs��I�UP������\s��L���Z���L���'���$�Hcl$pL�l$ M�ݍX�H��H�t$ L��裆����u^Lt$8A��ARM��L��L��Sj�t$(L�L$ �L$PH�T$(�q��H�� ����r��D9d$P��r��I���H�t$8H�D$H)�I9�v��|$p�yH�D$H)�I9��hH��$`H��L���������MI��h���H�
cn!��?H�=�n!D�Ak�B#�A�x���E�����M�L>I��I��A�P�����?��	�M9�u�H�t>��L�����H���b���H�5�m!��?H�=An!D�Ak�B#�A�x���E��t%M�L>I��I��A�p�����?��	�M9�u�H�T:I��=��M����1���L��鬻��H�
�m!��?��������H�H�T��M��鐘��I�V鬸��H�
\m!��?H�5�m!�<k�#��w�����H��H���W�����?��	Ѕ�u�H�\3��E1�I�M,L��聅����@���6���A��D;T$8���H��钩��H�����<
�OH9����A�~
���I�N��H�
�l!��?H�=�l!D�Ak�B#�A�x���E���Y�M�L>I��I��A�P�����?��	�M9�u�H�t>�/�PI�rM��L��Sj�t$(L�L$@�L$PH�T$(�vn��H�� ���Lp��E���_I��鶲��H�4$M��M��H�t$ A��uH���MH��H	�H�L$ <W�`PM��L��L��Sj�t$(L�L$@�L$PH�T$(�n��H�� ����o��L�UM���J���L�����D�D$I�M,L��������@�����I��=���o��I�UP����=��o��I��=��*�I�UP������co��=_ �Xo��=0����Ho��H�
)k!��?H�=ok!D�Ak�B#�A�x���E�����M�L>I��I��A�P�����?��	�M9�u�H�t>�b�H�
�j!��?H�=k!D�Ak�B#�A�x���E�����I��I��A�P�����?��	Ѕ�u�H�t>���<���n��A�}|�P��n��H�
ij!��?H�=�j!D�Ak�B#�A�x���E�����M�L>I��I��A�P�����?��	�M9�u�L�t>�w�M��1��k�L�|$p����{����t1�A�EAA8F@������H��E1��D������������m��M��AWH��L��SM��AT�t$(L�L$@�L$PH�T$(��k��H�� �m��I������L�$H�uM��L��PSj�t$(L�L$@�L$PH�T$(�k��H�� ���vm��M��L�$���=���I�UP�������Hm��M��L���d�L;t$A�����L9t$�„�������m����t'1�A�EAA8F@������H�����M��M���9��������I������I���v���I���}�A������h���L9t$�]���L�����A�Ex����As��M��L���s��M����M��I���o�M��M���
�f.���AWAVAUATUSH��D��H���H��$ �L$8H�D$(dH�%(H��$�1Ɂ�oZ�|$��H����I��H���~D��$(H����E���� ˆT$S�^��$(�����S�{E��HDŽ$����HDŽ$���L�K H���$ H��$(�CHDŽ$���$$H�D$H��tMH���t	L�VL�T$��tL�VL��$���tL�V(L��$��tL�VL��$�����M�����;ERCPI���a��A�OE�_L��$xD��$�DŽ$hH�L��$8HDŽ$�H‰�H��D���H��$pHc\$8��$\��L�%�D$��������H��$��D$d��$T�������$XD�������$LD�������$PD���
����$`D�����A����$d�E�I��@H��$@D��%���$�=���=�������D$��H��$�dH3%(�D$��H���[]A\A]A^A_�L�t$H��$�H��D�D$H��$�H��L�L$L���|��I��H���@M���P�xID��@D�D$L�L$H�l$�?���L�N M���#���L�
4e!������������EƉ�$l��D��%p��pE�= �f
��=@��	=P��	=0��H��
H��$f��$0��tA�G��
Ic�H�D$pI�l�D$��t
A�� ��A�G�VUUU�lj�$(��$(��H�|$0A���)�D�Rf����D����D����)�9���D�DH�d!L�L$@Ic�D�\$<�L$ H��D�D$�D�D$�L$ H��H��D�\$<L�L$@H��$��C������D��$DŽ$H��A�GDŽ$������D�ꉔ$9�$(}D���D$T�kC�4�VUUUH�|$(D��$��DŽ$H��A�GH��$DŽ$������D�D$T)�9�$(H����$AL�H��t7��Mc���J����Hc�H��H�|�H��H9�r@�����H��H9�v��A��A�WD	�fD�\$j���|$@u,���f�|$juH�|$H��t�G�[	��D$lA�����H�D$x���GA�@��A�ӉT$`����A�Ӄ���$�A����$�H�A��H�}�E�ց�H��D�d$<L��$�L��$�I��H�|$X�L$H�D$HH�D$ H���U�D$L������H��$��D$H��H�����H9�r5�cHc�$H��H)�H��H9�w��$0@83�IH��H9�v0��$��t�E1�I�L$,H����y����uH��$�H��H9�wЋD$<���A����2�D$l���ZH9��=H��$8�E�A9�u�#�E�D9��H��H9�u�����n	=����DŽ$�
DŽ$��$0�����H��H9��L�����$��uaHc�$H��H)�H��H9�w��$08�H�CI9�v��K����t�@�����u�H��I9�u�L����H��I�L$,A�H���x��������H��$��f�|$j��H�L$xH���\fD��$�����H�t$H��t�FtL��H)�9F(�n�L$`���tqL��H)�H=�c1�A�����H�H9D$XsP��$�����I9��,�H�pH�t$X9�t"H�D$X�t$`�H��I9���H�P9�u�H�D$X�H��M��H��H��$�H��H��$�HDŽ$�jjjL�L$PH��$��:a��H�� D��$hE��tH�|$ ��=�������=���um�L$T��tH�>_!H��$��|$S���H�D$ �D$����H��������$(�D$������H�t$(L)�H��t$8�p���@=���t,������D$�D$S�r����=���t��u�D�T$H�]E���~D�L$E��tA��$H��$�����Hc�$H)�H��H9�w��$08M��fDD�D$@E���nI9��e�{�
u	I9���H��$H��H�������H�D$HH�H9������DH���F�����H9�w������������$18C��������������$18C���������DŽ$��������$��|$`����H��$����DŽ$l���DŽ$l����H9�����EA9�u����D�ED9�����H��H9�u����E�GA��fA���/A���D$lH�D$x���H9������D�D$dI�L$,H����t����������D$T��������~���H��$xH�D$pH�H9��:����D$����H9��%�����$����H9��EH�EH9�v/�U����t�������	H��H9�u�H��H�ŀ}�
������$���������H9������1��}
��H����D�t$8L��D�D$<L�L$0D��D�\$ �L$�s�����`D�D$<�L$D�\$ L�L$0E������E9������E��������<�������D$�����T���@DŽ$�e���H��$�H�L$ �	���DŽ$�C����;
�+���H��$��@ ������$����v��$�����H����I9��y����E��<�t�i���@���<��W���H��I9�u�L���F���L��$8�D$lH�D$xA�<8A�������|$T��t=��$(H��$����$(9�$�~DŽ$HH�{Z!H����$H1���u��$��������$(��H��$xH��$��D$H�t$(H)ʉH��$�H)ʉV���H9�������EH�ƒ�H�����H�L$x��s(�}���D�EH�ƒ�H��������Z���H��H9�u��L���H���D$lA�����H�|$x���H9�r)�&���fDH9��oH��H9��
���H��$x��$��u�Hc�$H�H9�rЉ���$0��Hc�@8tu���������$18Du����I9��H���H�H�H�L$X9�$��5���9T$`�+���H�D$X�t$`��$��9�����H��I9�������H�P9�u�����������$18E�Y�������@Hc�$H�H9���������$0��Hc�@8t��������������$18D�������f.�H��$xH�����I�L$,A�H���o�������������E1�I�L$,H���po�����y������DŽ$�
����D$������H�D$(H�sH�x��$(�P�Hc�H���.o�������D$�����d��D$��W��D$�����J��D$��=��D$�����0��D$�����#��Fn��fD��AVAUATUSH��pdH�%(H�D$h1�H���'I��H���H��A��1�H��t�ua�?ERCPue�����A��w'H�
�JD��Hc�H�>��f��Gf����A�E1�H�t$hdH34%(��H��p[]A\A]A^�fD�?ERCPH�^t�L�t$0H��H��L���im��H��H����H��IE��p���������H��t��Ct��C(��G%z�I�ED���u���@�GI�E1��c���f��GA�E1��R�����GA�E1��B�����G����G�&���H��t�C1ۨtH�]H��I�]1������������G����G����1�H��t��DH�9U!I�E1����fD�GA�E1����f��GH�1�I�}���fD�G�Ѓ�A�E1��u���@�Gf����A�E1��[���f.��GA�E1��B������������+���������'��������������k�����AW�AVAUI��1�ATUSH��H��8dH�%(H�D$(1�H�L$��k���Ņ�u&�D$����H�L$�1�H���k���Ņ�t+H�T$(dH3%(����H��8[]A\A]A^A_�fDH�L$ �	1�H���|k���Ņ�u��D$L�|$ D�t$�D$D9�}2F�d5�\$L��A�A��Hc�L�H�s�-k����t)~A�l$D9�|ν�����h���D�d$E���fD�+�C����D����j��f.���AWAVAUATUH��1�SH��H��HH�T$�H�L$H�L$,dH�%(H�D$81��j��A�ą�u'�D$,����H�L$(�1�H���j��A�ą�t/H�|$8dH3<%(D���?H��H[]A\A]A^A_�f�H�L$0�	1�H���<j��A�ą�u�D�|$,�D$(L�l$0D�|$�D$E9�}1C�<�\$H�����A��Hc�L�H�s��i����t+~E�fE9�|�A������^���DD�t$,E���fDI9���HcD$�I��H��H)�H��I��I���	�M�M9�sK�4<H���|i����t�D$�L$����H�L�I��H9�v%Hc�I��L�h�	�L�I9�vJ�4+H���<i����t�H�D$L� H�D$D�d$H����I�����h��@SH��H�� dH�%(H�D$1��G
u
�G��H�L$H�T$�h����~oH�T$H�t$D�D�JH9�wFA��A������<	Hc��<���yZHc��fD��J��ȍHcɋ���yH�H9�v�A����A��A��D�H�\$dH3%(uH�� [��h��������h����USH����xR9�}N�Hcɋ��\�)�D9�}+Hc�Hc�H�H��L���Wh���(H����[]�f.��������f��������f���AVAUM��ATA��UH��L��SH��D�t$0�����~'E��M��D��H��H��[��]A\A]A^�g��f�[]A\A]A^����AWI��AVAUATUHc�S�\-H��H��O!H�������I��I��H����L�L�f.�D�@H��B�|	+x�I9�u�Hc���I��H���~M�7I�L�I��I�l�f�Icu�A�]H��I��I��)�L�Hc�H���
g��H�I�F�H�K�I9�u�H�E1�H��[]A\A]A^A_Ð���H��H��tI�/�Ҹ������D��H�=O!� ��AVAUATUS��x`9�}\�H��N!I��L��Hc�L�4��\�A+�{Hc��H��t9Ic6Lc�H��L��L��^f��B�(H�E��[]A\A]A^�f.�������������f���AUM��ATA��UH��L��SH��H�������~"H��M��D��H��H��[��]A\A]�e��DH��[]A\A]�f.���H�MN!� ��USH��HdH�%(H�D$81�H��t|�?ERCPH��H��uSH��t�G%z��EH��t�G�t&�G��GH�L$8dH3%(u>H��H[]�D���������ѐH��1�1��d��H��H��u������빸������d��f���H�M!AV�@AUATUS�H��H��tn�Ie��I��1�@I�$���TH��H=u��4e��1�I���%D�c��H�����H��H��t$I�E�DXu�I�$����[H��]A\A]A^�L��1�H��HDžH��L��A�HDž8H)���@���H�1�I�E��I�EB��� t���D�����H���AI�EB���t!���D������� ��H�AI�EB�f��y#���D��������H�AI�EB���@t#���D��������H�AI�EB���t#���D�������H�AI�EB���t���D����� ��H�AI�EH��H�����x��L�?��t ����D�������@��H�AI�E�x��t!���D�������`��H�AI�EB���t!���D���������H�AI�EB��t���D��������H�AH��_�_��������S���E1�L�%�>�>�A��_tmL����a�����H��E�B��5@I��I�������I�EB�p�D����f��
��I��D���SE���SE���SEڨt�������SI��D�E��tVA���~MD��H�
�I!L�
J!��?��[E#��A���t%L�W1��A���I����?��A	�9�u�tTA��
��~(A���tO|IA��( A��w<A��[�f�1�A��
|�A�[�f.�A��
t�A��
t1�[�1�E��[����A���H��H9�sG�
�E�A�3[�f�H���H9�s1��
����A�[��@�뾐��SI��H�G�D�O�E��tqD��D�Ƀ�@���uH����σ�@���t�D�ɀ��vDH�=^H!��?L��H!��[E#��A���tL�P1�f�A�:��I����?��A	�9�u�tdA��
~.A���tu|_A��( A��wR�A�[��A��}:A��
u0�H9�v1ɀx�
�����A�[�@A��
t2A��
t1�[øA�[�f�1�E��[����A���H9�v�x�
�E�A�3[�D�����H��G!�����L�
tG!A;9~wD�@��I���H�JA9|��}
H�ʉ�I9�u�D�B�Lc�L�I��I�H�����H������?�ɀ�JL9�u�ƒ���Hc�H�H�]G!B
<�@�>�DH�IG!E1ɸB
<�@�>Ð��H��t0���G��9�~1�1�f�W�f��=��O‰���f�Wø����ÐAW1�AVAUATA��USH��(A����H�t$�D$��T$<_�#H�_<b��E1�E1�A�����H�-�:f���<rwI��HcT�H�>���Cf����H����Tt�H��E!Hc��H����<rv��H��E!�H��D�Cf����H�\��C!�H����x�H���3u����A�����Y�C"H��"�H�����T$H�t$H����������A�H�ڐ�Bf����H€:Tt�H�Z�B����H��H�XA���@���@E��xE���*E9��!<T�$�CE��H��E1�E1�����Cf����H؀8T�Y���H�X�@���@H��D!�H�f.��Bf����H€:Tt�H�Z�B�P����{f����H|$�;H����Bf����H€:Tt�H9��DH9��;A��CH������D�\$A���{E1��CH�s�H������H���3�F�����CH��f����D���E����@�CA����<H���H�\��h�����CH�Sf����A�E���U�C<��IH�
:C!��?�H��� ���H�IC!�H�E��t�C�<�vH�C!��?�H�@������Cf����A��C��<H���H�\����fD�C��H�S<H��B!HB��H�����f�A��H�SE��t�C<��.����CH���[����CA��H���G����H�C�:����E���U�CA��H������fD�C�[����A��H��!���CH�����@E�H������D�T$H�t$�b���A����f.�E��<T����D��H��([]A\A]A^A_��C"H��&f����A���v���fD�CH���c����S�t$D�\$H�|$f�����UY��D�\$H��t<H����Qf����Hр9Tt�H9�v-H9�s(A��-���H�_���H��(�����[]A\A]A^A_ËT$H�t$H���z���A�����H��(�����[]A\A]A^A_�@f.�I�ʉ��A�����E��A��D��t$I����0tI��0�ȃ���A����D�f.�AWE1�L�=�6AVM��AUA��ATI��U��SH��H���?_�D$A��O�TM�A���<lw8��Ic�L�>��A�Bf����I�A�:Tt�I��A���<lv�@�D$�D$H��[]A\A]A^A_�DI��A�B��<��H�=�7��Hc�H�>���I��A�rI�NM�F��H������DA�D$A�f����I�A�<$T�)����r���f.���I�V��`���H��H=�u�I����@�D$�fD��I�V��`���H��H=�u��h������I�V����`���H��H=�u��6���fD1��@	�������H��I�V�4�H��u܉с�	�@�s��@1��@��	ʈ������H��I�V��H��uځ���	ʈS���@I�V�T�H��H��`u����D�@I�V���T�H��H��`u��c���A�jI��������I������I�rE���t1�f��H��H�� u�A�B!�H���������H��3�.�����A�B"A�R#�������I��&�����A�rI�NM�F��H��L�T$�d���L�T$I�BE����A�RI�€������H��<!��?�I����I��@A�Bf����I�A�:Tt�I������f�I��M��D���L��H��L�T$�4���L�T$��������A�Bf����I�A�:Tt�I���s���f�I�rE�������1���H��H��u︀f.����HcҀ<t���%�����?��S��=u��}���@L��M��D���H��L�T$���L�T$�������Z���f.�A�Bf����I�A�:Tt����fD�D$���A�rI�NM�F��H��L�T$����L�T$I�BE��t&A�RI�€���2����_���I���$����I�������I��"����������K��C����f�C�C�����@DI�V���T�H��H��`u�I������@f.�I�V�T�H��H��`u�I�����1��D��	ʈ���{���H��I�V��H��uځ���	ʈS��1���	����=���H��I�V�4�H��u܉с�	�@�s�ڸ��I�V����`���H��H=�u�I������f���AVAUATI��USH���dH�%(H��$�1�H�H���O�?ERCPH���@A�����u�G�oA��W���GHc�H�H������G
uH�G H�D$H���[f�H�D$H��E1�H��H��$�)�$�H�L$H��L�D$H@H�L$ �у���H�D$(��)�$�����S��A��A��H��H����Å�y1�E��uDH��8!�\�H����H�P0H�H�P�@0,E��uT�@4��x�H4�XXH��$�dH3%(��H���[]A\A]A^�H��0I�$1���f�fo�$��@4fo�$�H8PH�DH��0H�1��f.�H�5�0I�4$�n����H�L$1��O��H�D$�S����]O��f.�H��H��H����H	��@f.���1��?PCRE��H���oI��I���oON�oW V �GȉF�G�ȉF���f�FA�x���f�FA�x�v���f�FA�x�h���f�FA�x�Z���f�FA�x�L���f�FA�x�>���f�FA�x�0���f�FH��M��t:�Ao�AoaaI�Q H�Q A�Q(�Q(A�ʉA�QʉQA�Q(ʉQ(H���D�f.�D���V��������
H��D�E��A����A�����S�L��H�5n6!��?�9������~)�H�pA���P�Ӄ����(������������H��������u5H����)�ʅ��A�S�E����D�E��A��~]A����`���)�[�@�?�����t4H��H���8u�H)��ƍP�����A��H���R�E�������������f�H��A����A��>t_H���j����A���tGA���Cu���w<��%���DA���t/A���u���w�������0uĉ�[)��D�� u���f������[�)��fD��H�%�@���������������H���WI����Hc����������H�V!f���D�OL����DI����D�A��A��Mc�A)��
����AUATUS@�r������*L�K4!H��4!G�A��E�Mc�B�CL�4!M��@���eIc4�L�>��f�H�V���T���H�V!�K���A�X8Z@�ƃ���@8�uhH���
���b�����[]A\A]��A�XL��3!�rA94��f.�E�@A�p�@��@��A��A��D	ƃ���@8�u���[]��A\A]�D��u���A��d����D��H��A���OA9��Y����@D��H�ZA�����H�S��ف���~A9�� ���9������w���H�
�2!��?H�3!�,1�LmD#�D�e��A���t�1�H�ӐH���s�����?��A	�9�u�J�T"�`����H�V�����H�5Y2!��?H�-�2!D�$C�dB#\�E�l$����E���Q���E1�H�ՐH���u�����?��	�D9�u�J�T*�)����H��1!��?�,2H�5;2!�LmD#�D�e��A�������1�H��DH���V�����?��A	�9�u�J�\#�����Ѓ�Ã��1������H��H���L���K���K���K���K���K���K���K���K���K���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M��~M���M���M���M���L���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M���M��iL���P���O���O���O���O���O���P���P���P���P���P���P���P���P��|P��DR��DR���O���O���O���O���O���O���O���O���O���O���O��R��R��R���O���O���O���O���O���O���O���O���Q���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���O���Q���O���O���O���O���O���O���O���O���O���O���O���O��P��P��P���O��,Q���O���P���P���P���P���O���O���O���O���O���P���P���P���P���O���O���O���O���O���O���O���O���R���R���R���R���R���R���R���R���R���R���R���Q���Q���Q���Q���Q���R���Q���Q���Q���Q���Q���R���R���R���S���S���R���R���S���S��HS��HS���R���S���R���S��HS���Q���Q���R���R���Q���Q���Q���Q���R���Q���R���Q���Q��xS��xS���R���R��xS��xS���S���S���R��xS���R��xS���S���Q���Q���Q���Q���Q���Q���Q���Q���S���S���R���Q���Q���Q���Q���Q���Q���Q��$V��$V��pV��pV��pV��$V��$V��pV��pV��pV��pV�� V��pV��$V��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��pV��PV��pV��pV��pV��pV��PV��PV��PV��PV��[^��aa���a��>a���`���`��|^��|^��|^��|^��|^��|^���`���`��~`��~`��|^��|^��|^��|^��|^��|^��`���_��A_���b���b���b��vb��fb��Eb��^��^��^��^��^��^��8b���a���b���a��^��^��^��^��^��^��Ha��Ha��Ld��#d���c���c���d��rd���]���]���]���]���]���]���d���d���c���c���u��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hq��hv��hv��hv���o���u��Pn���u��hv��hv��Hu��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��u��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��(t��s��xr��Pr��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv��hv���q���u������$�������$���t���$���d���$���$���$���$���$���$���T���$���D���$���4���$���$���$���$���$���$���$���$������$������$����$����������ǡ����(���ˈ����������������������������������������������������������ՠ���¢�����e�����������������������W����C��������������������������������������������	���׬��v���	���N}��$�������ף��������������������������������������������������������������������������������������	�����������������������������������������������������������������������������������������������������������������������������������������ߣ����������������������no error\ at end of pattern\c at end of patternunrecognized character follows \numbers out of order in {} quantifiernumber too big in {} quantifiermissing terminating ] for character classinvalid escape sequence in character classrange out of order in character classnothing to repeatoperand of unlimited repeat could match the empty stringinternal error: unexpected repeatunrecognized character after (? or (?-POSIX named classes are supported only within a classmissing )reference to non-existent subpatternerroffset passed as NULLunknown option bit(s) setmissing ) after commentparentheses nested too deeplyregular expression is too largefailed to get memoryunmatched parenthesesinternal error: code overflowunrecognized character after (?<lookbehind assertion is not fixed lengthmalformed number or name after (?(conditional group contains more than two branchesassertion expected after (?((?R or (?[+-]digits must be followed by )unknown POSIX class namePOSIX collating elements are not supportedthis version of PCRE is not compiled with PCRE_UTF8 supportspare errorcharacter value in \x{...} sequence is too largeinvalid condition (?(0)\C not allowed in lookbehind assertionPCRE does not support \L, \l, \N, \U, or \unumber after (?C is > 255closing ) for (?C expectedrecursive call could loop indefinitelyunrecognized character after (?Psyntax error in subpattern name (missing terminator)two named subpatterns have the same nameinvalid UTF-8 stringsupport for \P, \p, and \X has not been compiledmalformed \P or \p sequenceunknown property name after \P or \psubpattern name is too long (maximum 32 characters)too many named subpatterns (maximum 10000)repeated subpattern is too longoctal value is greater than \377 (not in UTF-8 mode)internal error: overran compiling workspaceinternal error: previously-checked referenced subpattern not foundDEFINE group contains more than one branchrepeating a DEFINE group is not allowedinconsistent NEWLINE options\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain numbera numbered reference must not be zero(*VERB) with an argument is not supported(*VERB) not recognizednumber is too bigsubpattern name expecteddigit expected after (?+] is an invalid data character in JavaScript compatibility modedifferent names for subpatterns of the same number are not allowed�@���������`���������� ���� ����@��������������������������� ����alphalowerupperalnumasciiblankcntrldigitgraphprintpunctspacewordxdigitpnooklmACCEPTCOMMITFFAILPRUNESKIPTHEN:;<=>?@�������������������[\]^_`������
�
��	���Q\E{0,DEFINEUTF8)CR)ANY)BSR_ANYCRLF)BSR_UNICODE)Error text not found (please report)����й����й�������������������D���n���n���b���V���V���J���J���z��� �������� ���+��+�����D�����D�����D�������� ���M��M�����}�����$��������O�������~��������9�����'��'��m��m��������������'��m�������'��'��m��m��������������'��m�������������A��A��������W��W�������A�����W�� ��� ��� ��� ��� ��� ��� ��� ������������ ������������������������� ������3��������3�������� ��� ��� ��� ��� ���(�(� ��� ��� ��� ���� ��� ������������ ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� ��� �������;��;�������������������;�������� ��� ��� ��� ��� ��� ��� �����R�R�������&��&��&���R����&�� ��� ��� ��� ��� ��� ��� ���������������:��:��:��������:�� ��� ��� ��� ��� ��� ��� ���w��w�����������������������w����������� ��� ��� ��� ��� ��� ��� ���������|��|��v���v������������������|��v�������������j�����]�9�������������!�!���������^�^�����v�v����|��\�����|��|��$�������|�����|��,��t����������l�������,��L&���$��4$��$��4&���%��l �� ��������<��<��<��<��<��<���������)���)��,*��*��\*���&���&���&���&���&���&���#���#��L"���*���*���*��l*��L+��L+��L+��L+��L+��L+��+��+���!��,!��t3��L3��.���3���3���3���3���3���3���3���3��t,��t,��\0��L.���1���&��,��������������D�����D���/���.��d,���+��d��d,���+��d���3���3���3���3���3�����4(���'��D)��� ���(���������(�����8���7��8���7��8��5��4���2���1��6��M;���:��_:���9���=��=��\<���;��:>��P?��P?���=��]>���8���>���9���C��G��	F��E��(D��L��dK���J��DO��~N���M���L���O���I���>���>��gI���H���H��H��H���V���V��2V��tU���R���U���T��nT��nT���>���>��DX���W���W��OW���T��<^���]��l]��]���\��4\���[��$[��$[��|>��|>���Z��0Z���Y��IY���X��U�������ȇ�����҉��
���������0������ْ��s���s���s���(>��(>������������������_�������Ǡ�����3�������������������=���=��V���ߛ��I���(������Ȣ������������������ӛ���������=���=��������������ʚ����������$���4���L���t���$�������Զ���������������d���Ե��\*+?{^.$|()[������������������t��t��t��t��t��t��t��t��D��4��4����t��t��t��t��t��������������������������T��T��������T��T��T��T����T�����T��T�����T�����������T��T��T����T�����T�����d��d��������d��d�����������d�����d�����������������������������������������$��������������������������������4��4��4�����4��4��������������������������������������������������������H�����������������������������������������������������������������������������x��x����������x�����������������������������������������������X��X�����������������������������������������������������H���������������������������������������������������������������������������������������������������������������C���������������argument is not a compiled regular expressionunknown or incorrect option bit(s) setfailed to get memory�M=%N+3<DMSUiBpsvC{���	�
��>���
��O����%-4=PNXQnR�S�T���D����� �!�E�"�#�U�����	�FG	$
V&)%3&;=
@'L?PSV(\He)p*|W�X�+�,������@�A����I�-��Y�J��./	K0)10283A4HZQ[Z5`6g7n8s9{:�;�L�<����AnyArabicArmenianAvestanBalineseBamumBengaliBopomofoBrailleBugineseBuhidCCanadian_AboriginalCarianCcCfChamCherokeeCnCoCommonCopticCsCuneiformCypriotCyrillicDeseretDevanagariEgyptian_HieroglyphsEthiopicGeorgianGlagoliticGothicGreekGujaratiGurmukhiHanHangulHanunooHebrewHiraganaImperial_AramaicInheritedInscriptional_PahlaviInscriptional_ParthianJavaneseKaithiKannadaKatakanaKayah_LiKharoshthiKhmerLL&LaoLatinLepchaLimbuLinear_BLisuLlLmLoLtLuLycianLydianMMalayalamMcMeMeetei_MayekMnMongolianMyanmarNNdNew_Tai_LueNkoNlNoOghamOl_ChikiOld_ItalicOld_PersianOld_South_ArabianOld_TurkicOriyaOsmanyaPPcPdPePfPhags_PaPhoenicianPiPoPsRejangRunicSSamaritanSaurashtraScShavianSinhalaSkSmSoSundaneseSyloti_NagriSyriacTagalogTagbanwaTai_LeTai_ThamTai_VietTamilTeluguThaanaThaiTibetanTifinaghUgariticVaiYiZZlZpZs�����������������!!																										











																														 !"#$%&$'()***++,-.----/01/01/012/0134567899:;<=>?@ABBCDBEFGHGIJKLMMMNOOPQ-RRRRRRRRRSSSSSSSSS



SSSSSSSSSSSS













RRRRR






S
S
















TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTVWVWSXVWYYZ[[[YYYYYX
\]]]Y^Y__`aaaaaaaaaaaaaaaaaYaaaaaaaaabccc`dddddddddddddddddedddddddddfgghijkkklmnVWVWVWVWVWopopopopopopopqrs`tuvVWwVW`xxxyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{{||||||||||||||||}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~��TT���}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~�}~}~}~}~}~}~}~�}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~YYYYYYYYYYY��������������������������������������YY�������Y���������������������������������������Y�YYYYYY�������������������������������������������������������YYYYYYYY���������������������������YYYYY�����YYYYYYYYYYYYY��������������������YY�Y�������������������������������S����������TTTTTTTTTTT���������Y������T������������������������������������������������������������������������������������������������������������������������������������������������������������Y������������������������������������������������������������YY�����������������������������������������������������������������������������������������������������YYYYYYYYYYYYYY�����������������������������������������������������������YYYYY����������������������������������������������YY���������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����������������������������������������������������������YY�������������������Y�TT���YY������������������������YYYYYY�������Y���Y��������YY��YY����������������������Y�������Y�YYY����YY���������YY��YY����YYYYYYYY�YYYY��Y�����YY����������������������YYYYY���Y������YYYY��YY����������������������Y�������Y��Y��Y��YY�Y�����YYYY��YY���YYY�YYYYYYY����Y�YYYYYYY����������������YYYYYYYYYYY���Y���������Y���Y����������������������Y�������Y��Y�����YY����������Y���Y���YY�YYYYYYYYYYYYYYY����YY����������Y�YYYYYYYYYYYYYYY���Y��������YY��YY����������������������Y�������Y��Y�����YY���������YY��YY���YYYYYYYY��YYYY��Y�����YY������������YYYYYYYYYYYYYYYY��Y������YYY���Y����YYY��Y�Y��YYY��YYY���YYY������������YYYY�����YYY���Y����YY�YYYYYY�YYYYYYYYYYYYYY���������������������YYYYYY���Y��������Y���Y�����������������������Y����������Y�����YYY��������Y���Y����YYYYYYY��Y��YYYYYY����YY����������YYYYYYYY��������YY��Y��������Y���Y�����������������������Y����������Y�����YY���������Y���Y����YYYYYYY��YYYYYYY�Y����YY����������Y

YYYYYYYYYYYYYYY��Y��������Y���Y�����������������������Y����������������YYY��������Y���Y����YYYYYYYYY�YYYYYYYY����YY����������������YYY�������YY��Y������������������YYY������������������������Y���������Y�YY�������YYY�YYYY������Y�Y��������YYYYYYYYYYYYYYYYYY���YYYYYYYYYYYY����������������������������������������������������������YYYY����������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY��Y�YY��Y�YY�YYYYYY����Y�������Y���Y�Y�YY��Y�������������Y���YY�����Y�Y������YY����������YY��YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������������������������������������������������������������������������Y������������������������������������YYYY���������������������������YYYY��������Y������������������������������������Y���������������Y�������



YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYY��������������������������������������������YYY���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Y����YY�������Y�Y����YY�����������������������������������������Y����YY���������������������������������Y����YY�������Y�Y����YY���������������Y���������������������������������������������������������Y����YY�������������������������������������������������������������������YYYY������������������������������YYY��������������������������YYYYYY�������������������������������������������������������������������������������������YYYYYYYYYYY����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYY																		


YYYYYYYYYYYYYYYYYYYYY












Y


YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����������������������������������������������������������������������YYYYYYYYYYYYY   !!!!  !!!YYYY!! !!!!!!   YYYY"YYY##$$$$$$$$$$%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%YY%%%%%YYYYYYYYYYY&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&YYYY'''''''''''''''''&&&&&&&''YYYYYY(((((((((((YYY))********************************+++++++++++++++++++++++,,---YY../////////////////////////////////////////////////////0101111111Y10100111111110000001111111111YY12222222222YYYYYY2222222222YYYYYY33333334333333YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY5555677777777777777777777777777777777777777777777777565555565666665667777777YYYY88888888889999999::::::::::555555555:::::::::YYY;;<==============================<;;;;<<;;<YYY==>>>>>>>>>>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY????????????????????????????????????@@@@@@@@AAAAAAAA@@AAYYYBBBBBCCCCCCCCCCYYY???DDDDDDDDDDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEFFFFFFGGYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTTTTTTTTTTTTTTTTHTTTTTTTIIIITIIIIHYYYYYYYYYYYYY`````JRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRZZZZZ`````KLMRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRZTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTYYYYYYYYYYYYYYYYYYYYYYTTTNOPPPPPPPPQQQQQQQQPPPPPPYYQQQQQQYYPPPPPPPPQQQQQQQQPPPPPPPPQQQQQQQQPPPPPPYYQQQQQQYY`P`P`P`PYQYQYQYQPPPPPPPPQQQQQQQQRRSSSSTTUUVVWWYYPPPPPPPPXXXXXXXXPPPPPPPPXXXXXXXXPPPPPPPPXXXXXXXXPP`Y`Y``QQZZ[X\XXX`Y`Y``]]]][XXXPP``YY``QQ^^YXXXPP```s``QQ__wXXXYY`Y`Y````aa[XXYbbcdYYYYYRYYRYRRRRRYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTTTTTTTTTTTTTeeeeTeeeTTTTTTTTTTTTYYYYYYYYYYYYYYY

f



f

gfffggfffg
f


fffff





f
h
f
ijff
gffkfgIIIIg

ggfffgggg


l
mmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnoooooooYYYYYY





















































































































































































































































































YYYYYYYYYYYYYYYYYYYYYYY






































YYYYYYYYYYYYYYYYYYYYYYYYY










YYYYYYYYYYYYYYYYYYYYY

























ppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqq


































































































































































































































































































































































































































































Y


















Y
YYYY























Y



Y



YY



























Y


































Y
Y



YYY








YY







YYY























Y













YYYYYrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr

















































YYY









YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYsssssssssssssssssssssssssssssssssssssssssssssssYtttttttttttttttttttttttttttttttttttttttttttttttYuvwxyz{|}R~~opopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopopop������opop���YYYYYYY���������������������������������������������YYYYYYYYYY������������������������������������������������������YYYYYYYYY�YYYYYYYYYYYYYYYY�����������������������YYYYYYYYY�������Y�������Y�������Y�������Y�������Y�������Y�������Y�������Y��������������������������������SYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY��������������������������Y�����������������������������������������������������������������������������������������YYYYYYYYYYYY����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYY











YYYY
�I�


���������TTTTTTSSSSS

����I

Y��������������������������������������������������������������������������������������YYTT

���������������������������������������������������������������������������������������������S���YYYYY�����������������������������������������YYY����������������������������������������������������������������������������������������������Y











������������������������YYYYYYYY



































YYYYYYYYYYYY�����������������������������������������������Y






































�������������������������������























































�����������������������������������������������Y����������������������������������������������������������������������������������������







































��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYY































































����������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYY�������������������������������������������������������YYYYYYYYY����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYY}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~}~YY}~}~}~}~}~}~������YYYYYYYY���K}~}~}~}~}~}~}~}~}~}~}~}~YYYYYYYY����������������������������������������������������������������������������������������YYYYYYYY






















SSSSSSSSS

R�S

YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY-----��������������������������������������������YYYY


YYYYYY��������������������������������������������������������YYYYYYYY���������������������������������������������������������������������YYYYYYYYY������������YYYYYY����������������������������YYYY������������������������������������������������������������������������������������YYYYYYYYYYY������������������������������YYY������������������������������������������������������������������������������Y�����������YYYY��YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������������������������������YYYYYYYYY��������������YY����������YY��������������������������������YYYY�������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYY�����YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����������������������������������������������YY����������YYYYYY������������������������������������YYYYYYYYYYYY�����������������������YYYY�������������������������������������������������YYYY��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YY��������������������������������������������������������������YY����������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�����YYYYY��������������������������Y�����Y�Y��Y��Y������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������YYYYYYYYYYYYYYYY����������������������������������������������������������������YY������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������
YYTTTTTTTTTTTTTTTTYYYYYYTTTTTTTYYYYYYYYYYYYYYY�����Y���������������������������������������������������������������������������������������������������������������������������������������YYY																										

����������S���������������������������������������������SS�������������������������������YYY������YY������YY������YY���YYY

Y


YYYYYYYYYY

YY������������Y��������������������������Y�������������������Y��Y���������������YY��������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY���������������������������������������������������������������������������������������������������������������������������YYYYY
YYYYYYY








���������������������������������������������������������������������������YYYYY











YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY












































TYY�����������������������������YYY�������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������Y����YYYYYYYYYYYY���������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������������������������������Y�������������������������������������YYYY��������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY��������������������������������������������������������������������������������������������������������������������������������������������������������������YY����������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������YY�Y��������������������������������������������Y��YYY�YY�����������������������Y���������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����������������������������YYY���������������������������YYYYY�YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY����Y��YYYYY��������Y���Y���������������������������YYYY���YYYY���������YYYYYYYY���������YYYYYYY��������������������������������������������������������������������������������������YYY�����������������������������YY���������������������������YYYYY���������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������Y������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY





















































































































YYYYYYYYYY






































YY



























































HHTTT


HHHHHHTTTTTTTT

TTTTTTT





























TTTT















































YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY






















































































YYYYYYYYYYYYYYYYYYYYYYYffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffgggggggYggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggfYffYYfYYffYYffffYffffffffggggYgYgggggggYgggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffYffffYYffffffffYfffffffYggggggggggggggggggggggggggffYffffYfffffYfYYYfffffffYggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggffffffffffffffffffffffffffggggggggggggggggggggggggggggYYfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfffffffffffffffffffffffffgggggggggggggggggggggggggggggggfgYY











































YYYY



































































































YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY






























YY
YYYYYYYYYYY
Y
YY
YYY
YYY




YYYYYYYY
YYYYYYY
YYYYYYYYYYYYYYYYYYYYYYYYY
Y

YY
YYYYYYYYYY



YY
YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY

































YYYYYYYYYYYYYY








YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY���������������������������������������������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY�����������������������������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY������������������������������YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTYYYYYYYYYYYYYYYY������������������������������������������������������������������������������������������������������������������������������YY	

 !""#$%&'((()*+,-./0123456789:;<=>?@AABCDEFGHIJKLLAMAANOPQRSTUVWXYZ[\]F^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^_^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^`abbbbbbbbcddefghijklmno"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""pqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr^^stuvwwxyz{|}~�������������������������������F������������������^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^�^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^�^^^^����rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr�rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr�									
!	 		!��	!				�	!y!	!����!	9���!���!	����!���!�!	�!	�!	�!	O!	�!	�!	�!a!	�!	�!�!	�!�!	�!	�!	�!	�!!8!	!����!����!����!	����!	���!	~���!	+*!	]���!	(*!?*!	=���!	E!	G!*!*!*!.���!2���!3���!6���!5���!1���!/���!-���!�)!�)!+���!*���!�)!&���!����!'���!����!%���!	T	����	�	&	%	@	?	 ������������������	������	����������
	
������������	�������	����	~���	P	 ������	����	�	0���
111177?
?????YYYY







++
++
+555
5
5556
666
66

$
$$$
$$/
///88888
    
99999
9999
&&
&&
&&	`((((--2233

%%%%%
%%"""
"""
4''
'
'
ZZ
ZZ
ZZ==
==
==KK
KK
EE
EEE
H
HHH	
	!�!�!���!	A�	����JVd�p~����		����������	����	����	����	����	����						��!	A��!	���!	!��!!�!		��	0���!		��!	�!	��!���!���!	���!	��!	���!	���!	���




��::<<<UUULLLL
NNNN!	�u��000
0@@J
JJJJ
D
DDDIII
ISS
SSSS
CCC
C
C&[[[[VV
VVV
		#FB));;***
	(
���.,,
PPPAAAGGWWWMMRRQQXTT
TTT>>>O8.02 2010-03-19h�0����p�	

 !"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`abcdefghijklmnopqrstuvwxyz{|}~�����������������������������������������������������������������������������������������������������������������������������	

 !"#$%&'()*+,-./0123456789:;<=>?@abcdefghijklmnopqrstuvwxyz[\]^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~�����������������������������������������������������������������������������������������������������������������������������>�~~�����������������������������������������x������������������;�2�1���03����5����:��H�;��\�>����A��$�B����D���E��@�J���pK����U��$�W��pPY����Z������x ���@���0����H���P�����������
p���
���0��|�������� �����d����������p��P���h0���`���������P���h�����������@�\`��������`�p� zRx�$�/���FJw�?:*3$"DH1���L\�3��E�B�E �A(�C0��
(A EBBGA
(D EBBA�`8���`�9���B�K�E �E(�D0�H8�HPK
8F0A(B BBBKy
8A0A(B BBBE`$�;��B�J�G �I(�A0�F8�DP�
8F0A(B BBBE�
8C0A(B BBBC\�d>��,B�E�E �H(�D0�A8�G@�
8C0A(B BBBFd8F0A(B BBBx�4?���B�B�E �E(�G0�C8�D@�
8A0A(B BBBDK
8C0A(B BBBA�8F0A(B BBB<d�@���B�E�E �D(�H0��
(A BBBAH�8A��8B�E�B �E(�D0�D8�Dp

8A0A(B BBBGH�,F���B�E�E �E(�A0�A8�G`x
8A0A(B BBBAH<�F��r
B�E�B �E(�D0�A8�Fp
8A0A(B BBBDH��P���B�B�B �B(�A0�A8�D��
8A0A(B BBBDD�R���B�H�L �E(�D0�J8�p
0A(B BBBF�S��t�0�T��8WB�B�B �B(�A0�A8�G���A�A�L�B�I�B�B�Y�r
8A0A(B BBBF�D�p�B��F�k�A�!�E�i�A�������F�B�B �B(�A0�A8�G� L�"\�"F�"N�"B�#N�#I�#E�#B�#Q�"��"��"D�"D�#G�#A�#B�#B�#x�"�
8A0A(B BBBE|�������q��p����QB�B�B �B(�A0�A8�G� L�@L�At
8A0A(B BBBA3�A[�AH�AB�AE�AI�A[�Aj�A^�AH�AB�AE�AI�A[�A��Aq�AH�AB�AE�AI�AN�AX#�A^�AH�AB�AE�AI�AV�Ahxd���F�B�B �E(�A0�A8�J�
8A0A(B BBBA!�H�B�H�H�D�b�(����#u��B�p�v
J������v�B�H�E �B(�A0�A8�G�0�X�A�D�W��
8A0A(B BBBF�J�F�A�W�>�L�H�B�N���L�H�B�N���X�B�D�\�q�U�B�B�\��X�H�D�W���J�B�D�W��X�H�D�W�q�U�B�D�\�m�X�H�D�W�f�R�H�D�W�h�L�B�G�W��N�B�D�W�S�K�E�A�S�G�T�A�B�W���J�B�A�W�i�N�B�D�W��J�B�D�W�$�J�F�A�W���J�B�A�W���J�F�D�W�s�J�B�D�W���N�B�D�W�C�J�B�D�W���N�B�D�]��	�J�B�D�W���K�B�D�W�5�K�B�D�W���J�B�D�W�O�J�B�D�\�!�J�B�D�\�}�J�B�D�W���J�B�D�W���J�B�D�W���J�B�D�W�V�J�B�D�W���J�B�D�W���N�B�D�W�t�N�B�D�W�u�N�B�D�W�z
�A�M�Ek�N�B�D�W�{�N�B�D�W���K�B�D�W�D�K�B�D�W���J�B�D�W�p�A�B�D�W�L�J�B�D�W���J�B�D�W�s�K�B�D�W�J�J�B�D�W��G�E�D�W�\�A�B�D�W�d�x���jF�B�B �B(�A0�A8�M��
8A0A(B BBBDL�l�B�B�V�DL����iF�B�B �A(�A0�D��
0A(A BBBGH�����&F�G�B �G(�A0�A8�Gp`
8A0A(B BBBGH������F�B�B �B(�A0�F8�G�r
8A0A(B BBBJ ,
����A�G0�
AA(P
����gE�A�D w
CAKH|
���YF�B�E �D(�G0�a
(C BBBNA(A BBBH�
����F�E�B �B(�A0�D8�H@�
8A0A(B BBBBȽ��
<(Ľ��~F�B�B �A(�A0�R
(A BBBKHh���SF�E�D �G(�G0M
(M CBBJD(A ABB����
,�����E�A�D`^
AAF<�����(M�G�B �A(�A0�y
(D BBBA8<����?E��
CU
KO
AF
R^
J^
J8x���OE��
Ik
EO
AM
CF
RZ
F������0��?x�X���B�D�B �B(�D0�A8�D`�
8A0A(B BBBD�
8F0A(B BBBA]8F0A(B BBB`���FHt���^B�L�E �E(�D0�C8�GPj
8A0A(B BBBFH����F�B�B �D(�A0�G�f
0A(A BBBD��� ���V�,<����y��E�H�x
HVA�lP��|�H��I��B�A �A(�d����X(����j
 ABBIJ
 AEBF�����P(���������J(����GNU��`x�"�x
��h�"p�"���o(0
�
Op�"ph�	���o���o���o�o����o��"����� 0@P`p�������� 0@GA$3a1��GA$3a1x�GA$3a1����GA$3a1��
GA$3p864�T�GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864`�ѠGA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864�b�GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864p���GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864����GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864���GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864����GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864��G�GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864P�x�GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864���GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864���GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864����GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864�s�GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864s�s�GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864����GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864����GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864��z�GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864����GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864����GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��
GA$3p864����GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864��
GA$3h864��GA$3a1����GA$3a1����GA$3a1��GA$3a1����libpcre.so.0.0.1-8.02-9.1.el8.x86_64.debug��_v�7zXZ�ִF!t/���m]?�E�h=��ڊ�2N�	CxW��kV�+�Lse+F�c���}��0��\ҹ���LF"��R~�'�/K'S�%���%���N@���R�&
�/��|w��d���@%��j�2NC���q���XFu^v���Y�w�l�=z��n� !���K���U:���c��.�)w�W��L?5��Ġs��iZ�<�}�R��tϱO���ʝ��X� �&?4�Xu�U�];f�D(0	��^=T���9Al�@o�����6�|Ue	��aC�ы�l�,I�o�If�zn�6n�:e.�~�8�&g�c0�v(��=�}�k����_��1�`֧��,QF����(G�`����`&K�׏(R*�o̫vެ�k�V���M���C1:����8�_�Ԍ&�9�����o�Ey�+���k�Eu~�'x��2K{��e4�?%MO>�<�7�\	`�I=EBP1Ʃ����o�d$\̹-I\y����K-E���m��#��i�g���֨�����ǽ?B���i�Wm��Pcpr�󷃲S�6�Tf~	�G~�k�E�J�����j�łD��%OE܎�'�muZ�Y����@0\c�J!x�_)Cg�F�Z�k@��7�
֡1��3<C�TW=h�K�8���W!A�s C���]��{��$��'֗XC�g�J+��"�v�~nZ�Z{�������+�z��g��=�i�574m��l:�\�UO�B�ܹT�,����M���7 J��]8�7�*""�uÂnthČ��ud�T����~�j]@n�����gΔ�������E*���B�:�V��m�_٭�+#[��Mu]�\>���(��u1�
����J������P�4h]mJ��q�K��B
�A���I���t�'1�ڣS
�9��ꨖ��P��"t�L
"����ty�
����7�܅m�ڼ2�k�G�L[c�,y�Qbf�c�s~��F����q{_A�?4��>3�,% �
gR�b.������_�1�P4S��B������������"�+$fEoc�?˩�9Z������Q��΀�5^������*�P�MT��Y�e�o�e^�%D.��c���P��C�Z�8���)��(�ي�X
�,-��*܊�t�6&��F���v�(�{������_�ets�!�k")�w��boX�`���<��ѝ���Z���=���(��b奃n�:�o���x���
�����^��-��,���x�_PO+1�sr����{�v{�4I�������i�f�շ3�]�,�����TR3��s%[&�q�=��dP�hB�X�5����l.0�}���+78�1��g�YZ.shstrtab.note.gnu.build-id.gnu.hash.dynsym.dynstr.gnu.version.gnu.version_r.rela.dyn.rela.plt.init.plt.sec.text.fini.rodata.eh_frame_hdr.eh_frame.note.gnu.property.init_array.fini_array.data.rel.ro.dynamic.got.data.bss.gnu.build.attributes.gnu_debuglink.gnu_debugdata$���o((�(��`00
0
O8���o���E���o`Thh�^Bphxxc���nPP�w���}����
���  � � ���������� �h�"h��p�"p��x�"x����"����p�"p���# � # �0c �!
"0@"��'(usr/lib64/libpcreposix.a000064400000020126150403561430011144 0ustar00!<arch>
/               1575493209  0     0     0       54        `
zzzzregerrorregfreeregcompregexecpcreposix.o/    1575493209  1667  135   100644  8096      `
ELF>�@@ 
��AVI��AUATI��UH��S��~Hc�H�L�,�L���H�XM��tGI�T$���t=H��H��tH�XH9�v[H�U�L��L���A�D.�H��[]A\A]A^�f.�H��u�H��[]A\A]A^����L�-�DH��M��L���RL�
H�����1�H�
�XH��Z[]A\A]A^�fD��H�?�%��SH��H���փ�H�� dH�%(H�D$1���H�L$L�D$����E����� E�����@E�����H�T$E�E1��HcT$H�H�SH��t01�1�H���H�H�C1�H�L$dH3%(u'H�� [�fDHcT$���Aw�H���������AWAVI�ֺ�AUATUH��SD��H��L�dH�%(H��$�1�E�bH�G����A������Eډڀ�A��Eډڀ�A��E�H�����u	M����1�E1�E1�E1�A����H���T$L�T$H�t$��T$L�T$H�t$��RA��H��E1�AW1�L���ZY����E��uaM���qL��1�@A�߉T�A�T��T�H��H9�r�E���-I9�v'H�D�J�T�fD�@����H���@�����H9�u�1��f�HcE�MH�)��`����I��
��I�����
��K�<vD�D$H��H�t$L�T$�I��H����C�vA�L�T$H�t$D�D$������E��u]�P���
w��H���H��$�dH3%(uhH�Ĩ[]A\A]A^A_��C�vE1�L�|$ �e����L���D$��D$�fDL��������덐1�����E������H��v���
GA$3p864GA$gcc 8.3.1 20190507
GA*GOW�DGA*GA!stack_clashGA*cf_protectionGA+GLIBCXX_ASSERTIONS
GA*FORTIFYGA*GA!GA*GA!stack_realign
GA$3h864
GA$3h864unknown error code at offset %s%s%-6dinternal errorinvalid repeat counts in {}pattern error? * + invalidunbalanced {}unbalanced []bad classbad escape sequenceempty expressionunbalanced ()bad range inside []expression too bigfailed to get memorybad back referencebad argumentmatch failed			

	collation error - not relevantGCC: (GNU) 8.3.1 20190507 (Red Hat 8.3.1-4) GNU��GNU�zRx�h�F�E�B �D(�D0�Y
(A BBBKI
(A BBBH\8L@]8D0A(A BBB�
 ��E�O0�
AG`�|F�B�J �B(�A0�D8�J���K�K�A�-
8A0A(B BBBHL/He����@�,������	�5<DR�
Zd�lz���|��_.annobin_pcreposix.c.annobin_pcreposix.c_end.annobin_pcreposix.c.hot.annobin_pcreposix.c_end.hot.annobin_pcreposix.c.unlikely.annobin_pcreposix.c_end.unlikelypstringeintCSWTCH.8.LC0.LC1.LC2.text.hot.group.text.unlikely.group.text.unlikely..group.text.hot..groupregerror_GLOBAL_OFFSET_TABLE_strlenstrncpy__sprintf_chkregfreepcre_freeregcomppcre_compile2pcre_info__stack_chk_failregexecpcre_execmalloc ��������,)��������`*������������������������������������+���������-��������c/���������0���������<�1��������})���������3��������g4�����������������5��������5��������81�������� L  
(
)
8
T 
b(
p0
~8@
�H
�P
�X
�`
�h
�p
�x
�
�
% �����.symtab.strtab.shstrtab.group.rela.text.data.bss.rela.gnu.build.attributes.text.hot.rela.gnu.build.attributes.hot.text.unlikely.rela.gnu.build.attributes.unlikely.rodata.str1.1.rodata.rodata.str1.8.rela.data.rel.ro.local.comment.note.gnu.property.note.GNU-stack.rela.eh_frame@P`h'pL"@�-�3�=�T8@�0	Sb(]@0|8�8(�@80�2`2��H �2�� 	� �@h��0�	-|�	S�	��	P	0
0
 @`P'	`�x(usr/share/man/man3/pcrestack.3000064400000015606150403561440012145 0ustar00.TH PCRESTACK 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE DISCUSSION OF STACK USAGE"
.rs
.sp
When you call \fBpcre_exec()\fP, it makes use of an internal function called
\fBmatch()\fP. This calls itself recursively at branch points in the pattern,
in order to remember the state of the match so that it can back up and try a
different alternative if the first one fails. As matching proceeds deeper and
deeper into the tree of possibilities, the recursion depth increases.
.P
Not all calls of \fBmatch()\fP increase the recursion depth; for an item such
as a* it may be called several times at the same level, after matching
different numbers of a's. Furthermore, in a number of cases where the result of
the recursive call would immediately be passed back as the result of the
current call (a "tail recursion"), the function is just restarted instead.
.P
The \fBpcre_dfa_exec()\fP function operates in an entirely different way, and
uses recursion only when there is a regular expression recursion or subroutine
call in the pattern. This includes the processing of assertion and "once-only"
subpatterns, which are handled like subroutine calls. Normally, these are never
very deep, and the limit on the complexity of \fBpcre_dfa_exec()\fP is
controlled by the amount of workspace it is given. However, it is possible to
write patterns with runaway infinite recursions; such patterns will cause
\fBpcre_dfa_exec()\fP to run out of stack. At present, there is no protection
against this.
.P
The comments that follow do NOT apply to \fBpcre_dfa_exec()\fP; they are
relevant only for \fBpcre_exec()\fP.
.
.
.SS "Reducing \fBpcre_exec()\fP's stack usage"
.rs
.sp
Each time that \fBmatch()\fP is actually called recursively, it uses memory
from the process stack. For certain kinds of pattern and data, very large
amounts of stack may be needed, despite the recognition of "tail recursion".
You can often reduce the amount of recursion, and therefore the amount of stack
used, by modifying the pattern that is being matched. Consider, for example,
this pattern:
.sp
  ([^<]|<(?!inet))+
.sp
It matches from wherever it starts until it encounters "<inet" or the end of
the data, and is the kind of pattern that might be used when processing an XML
file. Each iteration of the outer parentheses matches either one character that
is not "<" or a "<" that is not followed by "inet". However, each time a
parenthesis is processed, a recursion occurs, so this formulation uses a stack
frame for each matched character. For a long string, a lot of stack is
required. Consider now this rewritten pattern, which matches exactly the same
strings:
.sp
  ([^<]++|<(?!inet))+
.sp
This uses very much less stack, because runs of characters that do not contain
"<" are "swallowed" in one item inside the parentheses. Recursion happens only
when a "<" character that is not followed by "inet" is encountered (and we
assume this is relatively rare). A possessive quantifier is used to stop any
backtracking into the runs of non-"<" characters, but that is not related to
stack usage.
.P
This example shows that one way of avoiding stack problems when matching long
subject strings is to write repeated parenthesized subpatterns to match more
than one character whenever possible.
.
.
.SS "Compiling PCRE to use heap instead of stack for \fBpcre_exec()\fP"
.rs
.sp
In environments where stack memory is constrained, you might want to compile
PCRE to use heap memory instead of stack for remembering back-up points when
\fBpcre_exec()\fP is running. This makes it run a lot more slowly, however.
Details of how to do this are given in the
.\" HREF
\fBpcrebuild\fP
.\"
documentation. When built in this way, instead of using the stack, PCRE obtains
and frees memory by calling the functions that are pointed to by the
\fBpcre_stack_malloc\fP and \fBpcre_stack_free\fP variables. By default, these
point to \fBmalloc()\fP and \fBfree()\fP, but you can replace the pointers to
cause PCRE to use your own functions. Since the block sizes are always the
same, and are always freed in reverse order, it may be possible to implement
customized memory handlers that are more efficient than the standard functions.
.
.
.SS "Limiting \fBpcre_exec()\fP's stack usage"
.rs
.sp
You can set limits on the number of times that \fBmatch()\fP is called, both in
total and recursively. If a limit is exceeded, \fBpcre_exec()\fP returns an
error code. Setting suitable limits should prevent it from running out of
stack. The default values of the limits are very large, and unlikely ever to
operate. They can be changed when PCRE is built, and they can also be set when
\fBpcre_exec()\fP is called. For details of these interfaces, see the
.\" HREF
\fBpcrebuild\fP
.\"
documentation and the
.\" HTML <a href="pcreapi.html#extradata">
.\" </a>
section on extra data for \fBpcre_exec()\fP
.\"
in the
.\" HREF
\fBpcreapi\fP
.\"
documentation.
.P
As a very rough rule of thumb, you should reckon on about 500 bytes per
recursion. Thus, if you want to limit your stack usage to 8Mb, you
should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
support around 128000 recursions.
.P
In Unix-like environments, the \fBpcretest\fP test program has a command line
option (\fB-S\fP) that can be used to increase the size of its stack. As long
as the stack is large enough, another option (\fB-M\fP) can be used to find the
smallest limits that allow a particular pattern to match a given subject
string. This is done by calling \fBpcre_exec()\fP repeatedly with different
limits.
.
.
.SS "Changing stack size in Unix-like systems"
.rs
.sp
In Unix-like environments, there is not often a problem with the stack unless
very long strings are involved, though the default limit on stack size varies
from system to system. Values from 8Mb to 64Mb are common. You can find your
default limit by running the command:
.sp
  ulimit -s
.sp
Unfortunately, the effect of running out of stack is often SIGSEGV, though
sometimes a more explicit error message is given. You can normally increase the
limit on stack size by code such as this:
.sp
  struct rlimit rlim;
  getrlimit(RLIMIT_STACK, &rlim);
  rlim.rlim_cur = 100*1024*1024;
  setrlimit(RLIMIT_STACK, &rlim);
.sp
This reads the current limits (soft and hard) using \fBgetrlimit()\fP, then
attempts to increase the soft limit to 100Mb using \fBsetrlimit()\fP. You must
do this before calling \fBpcre_exec()\fP.
.
.
.SS "Changing stack size in Mac OS X"
.rs
.sp
Using \fBsetrlimit()\fP, as described above, should also work on Mac OS X. It
is also possible to set a stack size when linking a program. There is a
discussion about stack sizes in Mac OS X at this web site:
.\" HTML <a href="http://developer.apple.com/qa/qa2005/qa1419.html">
.\" </a>
http://developer.apple.com/qa/qa2005/qa1419.html.
.\"
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 03 January 2010
Copyright (c) 1997-2010 University of Cambridge.
.fi
usr/share/man/man3/pcre_get_stringtable_entries.3000064400000002170150403561440016075 0ustar00.TH PCRE_GET_STRINGTABLE_ENTRIES 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
.
.SH DESCRIPTION
.rs
.sp
This convenience function finds, for a compiled pattern, the first and last
entries for a given name in the table that translates capturing parenthesis
names into numbers. When names are required to be unique (PCRE_DUPNAMES is
\fInot\fP set), it is usually easier to use \fBpcre_get_stringnumber()\fP
instead.
.sp
  \fIcode\fP    Compiled regular expression
  \fIname\fP    Name whose entries required
  \fIfirst\fP   Where to return a pointer to the first entry
  \fIlast\fP    Where to return a pointer to the last entry
.sp
The yield of the function is the length of each entry, or
PCRE_ERROR_NOSUBSTRING if none are found.
.P
There is a complete description of the PCRE native API, including the format of
the table entries, in the
.\" HREF
\fBpcreapi\fP
.\"
page, and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_refcount.3000064400000001353150403561440013016 0ustar00.TH PCRE_REFCOUNT 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
.
.SH DESCRIPTION
.rs
.sp
This function is used to maintain a reference count inside a data block that
contains a compiled pattern. Its arguments are:
.sp
  \fIcode\fP                      Compiled regular expression
  \fIadjust\fP                    Adjustment to reference value
.sp
The yield of the function is the adjusted reference value, which is constrained
to lie between 0 and 65535.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_maketables.3000064400000001323150403561440013276 0ustar00.TH PCRE_MAKETABLES 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B const unsigned char *pcre_maketables(void);
.
.SH DESCRIPTION
.rs
.sp
This function builds a set of character tables for character values less than
256. These can be passed to \fBpcre_compile()\fP to override PCRE's internal,
built-in tables (which were made by \fBpcre_maketables()\fP when PCRE was
compiled). You might want to do this if you are using a non-standard locale.
The function yields a pointer to the tables.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_compile.3000064400000005645150403561440012631 0ustar00.TH PCRE_COMPILE 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This function compiles a regular expression into an internal form. It is the
same as \fBpcre_compile2()\fP, except for the absence of the \fIerrorcodeptr\fP
argument. Its arguments are:
.sp
  \fIpattern\fR       A zero-terminated string containing the
                  regular expression to be compiled
  \fIoptions\fR       Zero or more option bits
  \fIerrptr\fR        Where to put an error message
  \fIerroffset\fR     Offset in pattern where error was found
  \fItableptr\fR      Pointer to character tables, or NULL to
                  use the built-in default
.sp
The option bits are:
.sp
  PCRE_ANCHORED           Force pattern anchoring
  PCRE_AUTO_CALLOUT       Compile automatic callouts
  PCRE_BSR_ANYCRLF        \eR matches only CR, LF, or CRLF
  PCRE_BSR_UNICODE        \eR matches all Unicode line endings
  PCRE_CASELESS           Do caseless matching
  PCRE_DOLLAR_ENDONLY     $ not to match newline at end
  PCRE_DOTALL             . matches anything including NL
  PCRE_DUPNAMES           Allow duplicate names for subpatterns
  PCRE_EXTENDED           Ignore whitespace and # comments
  PCRE_EXTRA              PCRE extra features
                            (not much use currently)
  PCRE_FIRSTLINE          Force matching to be before newline
  PCRE_JAVASCRIPT_COMPAT  JavaScript compatibility
  PCRE_MULTILINE          ^ and $ match newlines within data
  PCRE_NEWLINE_ANY        Recognize any Unicode newline sequence
  PCRE_NEWLINE_ANYCRLF    Recognize CR, LF, and CRLF as newline
                            sequences
  PCRE_NEWLINE_CR         Set CR as the newline sequence
  PCRE_NEWLINE_CRLF       Set CRLF as the newline sequence
  PCRE_NEWLINE_LF         Set LF as the newline sequence
  PCRE_NO_AUTO_CAPTURE    Disable numbered capturing paren-
                            theses (named ones available)
  PCRE_NO_UTF8_CHECK      Do not check the pattern for UTF-8
                            validity (only relevant if
                            PCRE_UTF8 is set)
  PCRE_UNGREEDY           Invert greediness of quantifiers
  PCRE_UTF8               Run in UTF-8 mode
.sp
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
PCRE_NO_UTF8_CHECK.
.P
The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected. Note that
compiling regular expressions with one version of PCRE for use with a different
version is not guaranteed to work and may cause crashes.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fR
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fR
.\"
page.
usr/share/man/man3/pcre_copy_named_substring.3000064400000002327150403561440015411 0ustar00.TH PCRE_COPY_NAMED_SUBSTRING 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
.
.SH DESCRIPTION
.rs
.sp
This is a convenience function for extracting a captured substring, identified
by name, into a given buffer. The arguments are:
.sp
  \fIcode\fP          Pattern that was successfully matched
  \fIsubject\fP       Subject that has been successfully matched
  \fIovector\fP       Offset vector that \fBpcre_exec()\fP used
  \fIstringcount\fP   Value returned by \fBpcre_exec()\fP
  \fIstringname\fP    Name of the required substring
  \fIbuffer\fP        Buffer to receive the string
  \fIbuffersize\fP    Size of buffer
.sp
The yield is the length of the substring, PCRE_ERROR_NOMEMORY if the buffer was
too small, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_study.3000064400000002154150403561440012341 0ustar00.TH PCRE_STUDY 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This function studies a compiled pattern, to see if additional information can
be extracted that might speed up matching. Its arguments are:
.sp
  \fIcode\fP       A compiled regular expression
  \fIoptions\fP    Options for \fBpcre_study()\fP
  \fIerrptr\fP     Where to put an error message
.sp
If the function succeeds, it returns a value that can be passed to
\fBpcre_exec()\fP via its \fIextra\fP argument.
.P
If the function returns NULL, either it could not find any additional
information, or there was an error. You can tell the difference by looking at
the error value. It is NULL in first case.
.P
There are currently no options defined; the value of the second argument should
always be zero.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_get_substring.3000064400000002361150403561440014050 0ustar00.TH PCRE_GET_SUBSTRING 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B const char **\fIstringptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This is a convenience function for extracting a captured substring. The
arguments are:
.sp
  \fIsubject\fP       Subject that has been successfully matched
  \fIovector\fP       Offset vector that \fBpcre_exec()\fP used
  \fIstringcount\fP   Value returned by \fBpcre_exec()\fP
  \fIstringnumber\fP  Number of the required substring
  \fIstringptr\fP     Where to put the string pointer
.sp
The memory in which the substring is placed is obtained by calling
\fBpcre_malloc()\fP. The convenience function \fBpcre_free_substring()\fP can
be used to free it when it is no longer needed. The yield of the function is
the length of the substring, PCRE_ERROR_NOMEMORY if sufficient memory could not
be obtained, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcrepattern.3000064400000303252150403561440012512 0ustar00.TH PCREPATTERN 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION DETAILS"
.rs
.sp
The syntax and semantics of the regular expressions that are supported by PCRE
are described in detail below. There is a quick-reference syntax summary in the
.\" HREF
\fBpcresyntax\fP
.\"
page. PCRE tries to match Perl syntax and semantics as closely as it can. PCRE
also supports some alternative regular expression syntax (which does not
conflict with the Perl syntax) in order to provide some compatibility with
regular expressions in Python, .NET, and Oniguruma.
.P
Perl's regular expressions are described in its own documentation, and
regular expressions in general are covered in a number of books, some of which
have copious examples. Jeffrey Friedl's "Mastering Regular Expressions",
published by O'Reilly, covers regular expressions in great detail. This
description of PCRE's regular expressions is intended as reference material.
.P
The original operation of PCRE was on strings of one-byte characters. However,
there is now also support for UTF-8 character strings. To use this,
PCRE must be built to include UTF-8 support, and you must call
\fBpcre_compile()\fP or \fBpcre_compile2()\fP with the PCRE_UTF8 option. There
is also a special sequence that can be given at the start of a pattern:
.sp
  (*UTF8)
.sp
Starting a pattern with this sequence is equivalent to setting the PCRE_UTF8
option. This feature is not Perl-compatible. How setting UTF-8 mode affects
pattern matching is mentioned in several places below. There is also a summary
of UTF-8 features in the
.\" HTML <a href="pcre.html#utf8support">
.\" </a>
section on UTF-8 support
.\"
in the main
.\" HREF
\fBpcre\fP
.\"
page.
.P
The remainder of this document discusses the patterns that are supported by
PCRE when its main matching function, \fBpcre_exec()\fP, is used.
From release 6.0, PCRE offers a second matching function,
\fBpcre_dfa_exec()\fP, which matches using a different algorithm that is not
Perl-compatible. Some of the features discussed below are not available when
\fBpcre_dfa_exec()\fP is used. The advantages and disadvantages of the
alternative function, and how it differs from the normal function, are
discussed in the
.\" HREF
\fBpcrematching\fP
.\"
page.
.
.
.SH "NEWLINE CONVENTIONS"
.rs
.sp
PCRE supports five different conventions for indicating line breaks in
strings: a single CR (carriage return) character, a single LF (linefeed)
character, the two-character sequence CRLF, any of the three preceding, or any
Unicode newline sequence. The
.\" HREF
\fBpcreapi\fP
.\"
page has
.\" HTML <a href="pcreapi.html#newlines">
.\" </a>
further discussion
.\"
about newlines, and shows how to set the newline convention in the
\fIoptions\fP arguments for the compiling and matching functions.
.P
It is also possible to specify a newline convention by starting a pattern
string with one of the following five sequences:
.sp
  (*CR)        carriage return
  (*LF)        linefeed
  (*CRLF)      carriage return, followed by linefeed
  (*ANYCRLF)   any of the three above
  (*ANY)       all Unicode newline sequences
.sp
These override the default and the options given to \fBpcre_compile()\fP or
\fBpcre_compile2()\fP. For example, on a Unix system where LF is the default
newline sequence, the pattern
.sp
  (*CR)a.b
.sp
changes the convention to CR. That pattern matches "a\enb" because LF is no
longer a newline. Note that these special settings, which are not
Perl-compatible, are recognized only at the very start of a pattern, and that
they must be in upper case. If more than one of them is present, the last one
is used.
.P
The newline convention does not affect what the \eR escape sequence matches. By
default, this is any Unicode newline sequence, for Perl compatibility. However,
this can be changed; see the description of \eR in the section entitled
.\" HTML <a href="#newlineseq">
.\" </a>
"Newline sequences"
.\"
below. A change of \eR setting can be combined with a change of newline
convention.
.
.
.SH "CHARACTERS AND METACHARACTERS"
.rs
.sp
A regular expression is a pattern that is matched against a subject string from
left to right. Most characters stand for themselves in a pattern, and match the
corresponding characters in the subject. As a trivial example, the pattern
.sp
  The quick brown fox
.sp
matches a portion of a subject string that is identical to itself. When
caseless matching is specified (the PCRE_CASELESS option), letters are matched
independently of case. In UTF-8 mode, PCRE always understands the concept of
case for characters whose values are less than 128, so caseless matching is
always possible. For characters with higher values, the concept of case is
supported if PCRE is compiled with Unicode property support, but not otherwise.
If you want to use caseless matching for characters 128 and above, you must
ensure that PCRE is compiled with Unicode property support as well as with
UTF-8 support.
.P
The power of regular expressions comes from the ability to include alternatives
and repetitions in the pattern. These are encoded in the pattern by the use of
\fImetacharacters\fP, which do not stand for themselves but instead are
interpreted in some special way.
.P
There are two different sets of metacharacters: those that are recognized
anywhere in the pattern except within square brackets, and those that are
recognized within square brackets. Outside square brackets, the metacharacters
are as follows:
.sp
  \e      general escape character with several uses
  ^      assert start of string (or line, in multiline mode)
  $      assert end of string (or line, in multiline mode)
  .      match any character except newline (by default)
  [      start character class definition
  |      start of alternative branch
  (      start subpattern
  )      end subpattern
  ?      extends the meaning of (
         also 0 or 1 quantifier
         also quantifier minimizer
  *      0 or more quantifier
  +      1 or more quantifier
         also "possessive quantifier"
  {      start min/max quantifier
.sp
Part of a pattern that is in square brackets is called a "character class". In
a character class the only metacharacters are:
.sp
  \e      general escape character
  ^      negate the class, but only if the first character
  -      indicates character range
.\" JOIN
  [      POSIX character class (only if followed by POSIX
           syntax)
  ]      terminates the character class
.sp
The following sections describe the use of each of the metacharacters.
.
.
.SH BACKSLASH
.rs
.sp
The backslash character has several uses. Firstly, if it is followed by a
non-alphanumeric character, it takes away any special meaning that character
may have. This use of backslash as an escape character applies both inside and
outside character classes.
.P
For example, if you want to match a * character, you write \e* in the pattern.
This escaping action applies whether or not the following character would
otherwise be interpreted as a metacharacter, so it is always safe to precede a
non-alphanumeric with backslash to specify that it stands for itself. In
particular, if you want to match a backslash, you write \e\e.
.P
If a pattern is compiled with the PCRE_EXTENDED option, whitespace in the
pattern (other than in a character class) and characters between a # outside
a character class and the next newline are ignored. An escaping backslash can
be used to include a whitespace or # character as part of the pattern.
.P
If you want to remove the special meaning from a sequence of characters, you
can do so by putting them between \eQ and \eE. This is different from Perl in
that $ and @ are handled as literals in \eQ...\eE sequences in PCRE, whereas in
Perl, $ and @ cause variable interpolation. Note the following examples:
.sp
  Pattern            PCRE matches   Perl matches
.sp
.\" JOIN
  \eQabc$xyz\eE        abc$xyz        abc followed by the
                                      contents of $xyz
  \eQabc\e$xyz\eE       abc\e$xyz       abc\e$xyz
  \eQabc\eE\e$\eQxyz\eE   abc$xyz        abc$xyz
.sp
The \eQ...\eE sequence is recognized both inside and outside character classes.
.
.
.\" HTML <a name="digitsafterbackslash"></a>
.SS "Non-printing characters"
.rs
.sp
A second use of backslash provides a way of encoding non-printing characters
in patterns in a visible manner. There is no restriction on the appearance of
non-printing characters, apart from the binary zero that terminates a pattern,
but when a pattern is being prepared by text editing, it is often easier to use
one of the following escape sequences than the binary character it represents:
.sp
  \ea        alarm, that is, the BEL character (hex 07)
  \ecx       "control-x", where x is any character
  \ee        escape (hex 1B)
  \ef        formfeed (hex 0C)
  \en        linefeed (hex 0A)
  \er        carriage return (hex 0D)
  \et        tab (hex 09)
  \eddd      character with octal code ddd, or back reference
  \exhh      character with hex code hh
  \ex{hhh..} character with hex code hhh..
.sp
The precise effect of \ecx is as follows: if x is a lower case letter, it
is converted to upper case. Then bit 6 of the character (hex 40) is inverted.
Thus \ecz becomes hex 1A, but \ec{ becomes hex 3B, while \ec; becomes hex
7B.
.P
After \ex, from zero to two hexadecimal digits are read (letters can be in
upper or lower case). Any number of hexadecimal digits may appear between \ex{
and }, but the value of the character code must be less than 256 in non-UTF-8
mode, and less than 2**31 in UTF-8 mode. That is, the maximum value in
hexadecimal is 7FFFFFFF. Note that this is bigger than the largest Unicode code
point, which is 10FFFF.
.P
If characters other than hexadecimal digits appear between \ex{ and }, or if
there is no terminating }, this form of escape is not recognized. Instead, the
initial \ex will be interpreted as a basic hexadecimal escape, with no
following digits, giving a character whose value is zero.
.P
Characters whose value is less than 256 can be defined by either of the two
syntaxes for \ex. There is no difference in the way they are handled. For
example, \exdc is exactly the same as \ex{dc}.
.P
After \e0 up to two further octal digits are read. If there are fewer than two
digits, just those that are present are used. Thus the sequence \e0\ex\e07
specifies two binary zeros followed by a BEL character (code value 7). Make
sure you supply two digits after the initial zero if the pattern character that
follows is itself an octal digit.
.P
The handling of a backslash followed by a digit other than 0 is complicated.
Outside a character class, PCRE reads it and any following digits as a decimal
number. If the number is less than 10, or if there have been at least that many
previous capturing left parentheses in the expression, the entire sequence is
taken as a \fIback reference\fP. A description of how this works is given
.\" HTML <a href="#backreferences">
.\" </a>
later,
.\"
following the discussion of
.\" HTML <a href="#subpattern">
.\" </a>
parenthesized subpatterns.
.\"
.P
Inside a character class, or if the decimal number is greater than 9 and there
have not been that many capturing subpatterns, PCRE re-reads up to three octal
digits following the backslash, and uses them to generate a data character. Any
subsequent digits stand for themselves. In non-UTF-8 mode, the value of a
character specified in octal must be less than \e400. In UTF-8 mode, values up
to \e777 are permitted. For example:
.sp
  \e040   is another way of writing a space
.\" JOIN
  \e40    is the same, provided there are fewer than 40
            previous capturing subpatterns
  \e7     is always a back reference
.\" JOIN
  \e11    might be a back reference, or another way of
            writing a tab
  \e011   is always a tab
  \e0113  is a tab followed by the character "3"
.\" JOIN
  \e113   might be a back reference, otherwise the
            character with octal code 113
.\" JOIN
  \e377   might be a back reference, otherwise
            the byte consisting entirely of 1 bits
.\" JOIN
  \e81    is either a back reference, or a binary zero
            followed by the two characters "8" and "1"
.sp
Note that octal values of 100 or greater must not be introduced by a leading
zero, because no more than three octal digits are ever read.
.P
All the sequences that define a single character value can be used both inside
and outside character classes. In addition, inside a character class, the
sequence \eb is interpreted as the backspace character (hex 08), and the
sequences \eR and \eX are interpreted as the characters "R" and "X",
respectively. Outside a character class, these sequences have different
meanings
.\" HTML <a href="#uniextseq">
.\" </a>
(see below).
.\"
.
.
.SS "Absolute and relative back references"
.rs
.sp
The sequence \eg followed by an unsigned or a negative number, optionally
enclosed in braces, is an absolute or relative back reference. A named back
reference can be coded as \eg{name}. Back references are discussed
.\" HTML <a href="#backreferences">
.\" </a>
later,
.\"
following the discussion of
.\" HTML <a href="#subpattern">
.\" </a>
parenthesized subpatterns.
.\"
.
.
.SS "Absolute and relative subroutine calls"
.rs
.sp
For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or
a number enclosed either in angle brackets or single quotes, is an alternative
syntax for referencing a subpattern as a "subroutine". Details are discussed
.\" HTML <a href="#onigurumasubroutines">
.\" </a>
later.
.\"
Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP
synonymous. The former is a back reference; the latter is a
.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
subroutine
.\"
call.
.
.
.SS "Generic character types"
.rs
.sp
Another use of backslash is for specifying generic character types. The
following are always recognized:
.sp
  \ed     any decimal digit
  \eD     any character that is not a decimal digit
  \eh     any horizontal whitespace character
  \eH     any character that is not a horizontal whitespace character
  \es     any whitespace character
  \eS     any character that is not a whitespace character
  \ev     any vertical whitespace character
  \eV     any character that is not a vertical whitespace character
  \ew     any "word" character
  \eW     any "non-word" character
.sp
Each pair of escape sequences partitions the complete set of characters into
two disjoint sets. Any given character matches one, and only one, of each pair.
.P
These character type sequences can appear both inside and outside character
classes. They each match one character of the appropriate type. If the current
matching point is at the end of the subject string, all of them fail, since
there is no character to match.
.P
For compatibility with Perl, \es does not match the VT character (code 11).
This makes it different from the the POSIX "space" class. The \es characters
are HT (9), LF (10), FF (12), CR (13), and space (32). If "use locale;" is
included in a Perl script, \es may match the VT character. In PCRE, it never
does.
.P
In UTF-8 mode, characters with values greater than 128 never match \ed, \es, or
\ew, and always match \eD, \eS, and \eW. This is true even when Unicode
character property support is available. These sequences retain their original
meanings from before UTF-8 support was available, mainly for efficiency
reasons. Note that this also affects \eb, because it is defined in terms of \ew
and \eW.
.P
The sequences \eh, \eH, \ev, and \eV are Perl 5.10 features. In contrast to the
other sequences, these do match certain high-valued codepoints in UTF-8 mode.
The horizontal space characters are:
.sp
  U+0009     Horizontal tab
  U+0020     Space
  U+00A0     Non-break space
  U+1680     Ogham space mark
  U+180E     Mongolian vowel separator
  U+2000     En quad
  U+2001     Em quad
  U+2002     En space
  U+2003     Em space
  U+2004     Three-per-em space
  U+2005     Four-per-em space
  U+2006     Six-per-em space
  U+2007     Figure space
  U+2008     Punctuation space
  U+2009     Thin space
  U+200A     Hair space
  U+202F     Narrow no-break space
  U+205F     Medium mathematical space
  U+3000     Ideographic space
.sp
The vertical space characters are:
.sp
  U+000A     Linefeed
  U+000B     Vertical tab
  U+000C     Formfeed
  U+000D     Carriage return
  U+0085     Next line
  U+2028     Line separator
  U+2029     Paragraph separator
.P
A "word" character is an underscore or any character less than 256 that is a
letter or digit. The definition of letters and digits is controlled by PCRE's
low-valued character tables, and may vary if locale-specific matching is taking
place (see
.\" HTML <a href="pcreapi.html#localesupport">
.\" </a>
"Locale support"
.\"
in the
.\" HREF
\fBpcreapi\fP
.\"
page). For example, in a French locale such as "fr_FR" in Unix-like systems,
or "french" in Windows, some character codes greater than 128 are used for
accented letters, and these are matched by \ew. The use of locales with Unicode
is discouraged.
.
.
.\" HTML <a name="newlineseq"></a>
.SS "Newline sequences"
.rs
.sp
Outside a character class, by default, the escape sequence \eR matches any
Unicode newline sequence. This is a Perl 5.10 feature. In non-UTF-8 mode \eR is
equivalent to the following:
.sp
  (?>\er\en|\en|\ex0b|\ef|\er|\ex85)
.sp
This is an example of an "atomic group", details of which are given
.\" HTML <a href="#atomicgroup">
.\" </a>
below.
.\"
This particular group matches either the two-character sequence CR followed by
LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab,
U+000B), FF (formfeed, U+000C), CR (carriage return, U+000D), or NEL (next
line, U+0085). The two-character sequence is treated as a single unit that
cannot be split.
.P
In UTF-8 mode, two additional characters whose codepoints are greater than 255
are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029).
Unicode character property support is not needed for these characters to be
recognized.
.P
It is possible to restrict \eR to match only CR, LF, or CRLF (instead of the
complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF
either at compile time or when the pattern is matched. (BSR is an abbrevation
for "backslash R".) This can be made the default when PCRE is built; if this is
the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option.
It is also possible to specify these settings by starting a pattern string with
one of the following sequences:
.sp
  (*BSR_ANYCRLF)   CR, LF, or CRLF only
  (*BSR_UNICODE)   any Unicode newline sequence
.sp
These override the default and the options given to \fBpcre_compile()\fP or
\fBpcre_compile2()\fP, but they can be overridden by options given to
\fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. Note that these special settings,
which are not Perl-compatible, are recognized only at the very start of a
pattern, and that they must be in upper case. If more than one of them is
present, the last one is used. They can be combined with a change of newline
convention, for example, a pattern can start with:
.sp
  (*ANY)(*BSR_ANYCRLF)
.sp
Inside a character class, \eR matches the letter "R".
.
.
.\" HTML <a name="uniextseq"></a>
.SS Unicode character properties
.rs
.sp
When PCRE is built with Unicode character property support, three additional
escape sequences that match characters with specific properties are available.
When not in UTF-8 mode, these sequences are of course limited to testing
characters whose codepoints are less than 256, but they do work in this mode.
The extra escape sequences are:
.sp
  \ep{\fIxx\fP}   a character with the \fIxx\fP property
  \eP{\fIxx\fP}   a character without the \fIxx\fP property
  \eX       an extended Unicode sequence
.sp
The property names represented by \fIxx\fP above are limited to the Unicode
script names, the general category properties, and "Any", which matches any
character (including newline). Other properties such as "InMusicalSymbols" are
not currently supported by PCRE. Note that \eP{Any} does not match any
characters, so always causes a match failure.
.P
Sets of Unicode characters are defined as belonging to certain scripts. A
character from one of these sets can be matched using a script name. For
example:
.sp
  \ep{Greek}
  \eP{Han}
.sp
Those that are not part of an identified script are lumped together as
"Common". The current list of scripts is:
.P
Arabic,
Armenian,
Avestan,
Balinese,
Bamum,
Bengali,
Bopomofo,
Braille,
Buginese,
Buhid,
Canadian_Aboriginal,
Carian,
Cham,
Cherokee,
Common,
Coptic,
Cuneiform,
Cypriot,
Cyrillic,
Deseret,
Devanagari,
Egyptian_Hieroglyphs,
Ethiopic,
Georgian,
Glagolitic,
Gothic,
Greek,
Gujarati,
Gurmukhi,
Han,
Hangul,
Hanunoo,
Hebrew,
Hiragana,
Imperial_Aramaic,
Inherited,
Inscriptional_Pahlavi,
Inscriptional_Parthian,
Javanese,
Kaithi,
Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
Khmer,
Lao,
Latin,
Lepcha,
Limbu,
Linear_B,
Lisu,
Lycian,
Lydian,
Malayalam,
Meetei_Mayek,
Mongolian,
Myanmar,
New_Tai_Lue,
Nko,
Ogham,
Old_Italic,
Old_Persian,
Old_South_Arabian,
Old_Turkic,
Ol_Chiki,
Oriya,
Osmanya,
Phags_Pa,
Phoenician,
Rejang,
Runic,
Samaritan,
Saurashtra,
Shavian,
Sinhala,
Sundanese,
Syloti_Nagri,
Syriac,
Tagalog,
Tagbanwa,
Tai_Le,
Tai_Tham,
Tai_Viet,
Tamil,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Ugaritic,
Vai,
Yi.
.P
Each character has exactly one general category property, specified by a
two-letter abbreviation. For compatibility with Perl, negation can be specified
by including a circumflex between the opening brace and the property name. For
example, \ep{^Lu} is the same as \eP{Lu}.
.P
If only one letter is specified with \ep or \eP, it includes all the general
category properties that start with that letter. In this case, in the absence
of negation, the curly brackets in the escape sequence are optional; these two
examples have the same effect:
.sp
  \ep{L}
  \epL
.sp
The following general category property codes are supported:
.sp
  C     Other
  Cc    Control
  Cf    Format
  Cn    Unassigned
  Co    Private use
  Cs    Surrogate
.sp
  L     Letter
  Ll    Lower case letter
  Lm    Modifier letter
  Lo    Other letter
  Lt    Title case letter
  Lu    Upper case letter
.sp
  M     Mark
  Mc    Spacing mark
  Me    Enclosing mark
  Mn    Non-spacing mark
.sp
  N     Number
  Nd    Decimal number
  Nl    Letter number
  No    Other number
.sp
  P     Punctuation
  Pc    Connector punctuation
  Pd    Dash punctuation
  Pe    Close punctuation
  Pf    Final punctuation
  Pi    Initial punctuation
  Po    Other punctuation
  Ps    Open punctuation
.sp
  S     Symbol
  Sc    Currency symbol
  Sk    Modifier symbol
  Sm    Mathematical symbol
  So    Other symbol
.sp
  Z     Separator
  Zl    Line separator
  Zp    Paragraph separator
  Zs    Space separator
.sp
The special property L& is also supported: it matches a character that has
the Lu, Ll, or Lt property, in other words, a letter that is not classified as
a modifier or "other".
.P
The Cs (Surrogate) property applies only to characters in the range U+D800 to
U+DFFF. Such characters are not valid in UTF-8 strings (see RFC 3629) and so
cannot be tested by PCRE, unless UTF-8 validity checking has been turned off
(see the discussion of PCRE_NO_UTF8_CHECK in the
.\" HREF
\fBpcreapi\fP
.\"
page). Perl does not support the Cs property.
.P
The long synonyms for property names that Perl supports (such as \ep{Letter})
are not supported by PCRE, nor is it permitted to prefix any of these
properties with "Is".
.P
No character that is in the Unicode table has the Cn (unassigned) property.
Instead, this property is assumed for any code point that is not in the
Unicode table.
.P
Specifying caseless matching does not affect these escape sequences. For
example, \ep{Lu} always matches only upper case letters.
.P
The \eX escape matches any number of Unicode characters that form an extended
Unicode sequence. \eX is equivalent to
.sp
  (?>\ePM\epM*)
.sp
That is, it matches a character without the "mark" property, followed by zero
or more characters with the "mark" property, and treats the sequence as an
atomic group
.\" HTML <a href="#atomicgroup">
.\" </a>
(see below).
.\"
Characters with the "mark" property are typically accents that affect the
preceding character. None of them have codepoints less than 256, so in
non-UTF-8 mode \eX matches any one character.
.P
Matching characters by Unicode property is not fast, because PCRE has to search
a structure that contains data for over fifteen thousand characters. That is
why the traditional escape sequences such as \ed and \ew do not use Unicode
properties in PCRE.
.
.
.\" HTML <a name="resetmatchstart"></a>
.SS "Resetting the match start"
.rs
.sp
The escape sequence \eK, which is a Perl 5.10 feature, causes any previously
matched characters not to be included in the final matched sequence. For
example, the pattern:
.sp
  foo\eKbar
.sp
matches "foobar", but reports that it has matched "bar". This feature is
similar to a lookbehind assertion
.\" HTML <a href="#lookbehind">
.\" </a>
(described below).
.\"
However, in this case, the part of the subject before the real match does not
have to be of fixed length, as lookbehind assertions do. The use of \eK does
not interfere with the setting of
.\" HTML <a href="#subpattern">
.\" </a>
captured substrings.
.\"
For example, when the pattern
.sp
  (foo)\eKbar
.sp
matches "foobar", the first substring is still set to "foo".
.P
Perl documents that the use of \eK within assertions is "not well defined". In
PCRE, \eK is acted upon when it occurs inside positive assertions, but is
ignored in negative assertions.
.
.
.\" HTML <a name="smallassertions"></a>
.SS "Simple assertions"
.rs
.sp
The final use of backslash is for certain simple assertions. An assertion
specifies a condition that has to be met at a particular point in a match,
without consuming any characters from the subject string. The use of
subpatterns for more complicated assertions is described
.\" HTML <a href="#bigassertions">
.\" </a>
below.
.\"
The backslashed assertions are:
.sp
  \eb     matches at a word boundary
  \eB     matches when not at a word boundary
  \eA     matches at the start of the subject
  \eZ     matches at the end of the subject
          also matches before a newline at the end of the subject
  \ez     matches only at the end of the subject
  \eG     matches at the first matching position in the subject
.sp
These assertions may not appear in character classes (but note that \eb has a
different meaning, namely the backspace character, inside a character class).
.P
A word boundary is a position in the subject string where the current character
and the previous character do not both match \ew or \eW (i.e. one matches
\ew and the other matches \eW), or the start or end of the string if the
first or last character matches \ew, respectively. Neither PCRE nor Perl has a
separte "start of word" or "end of word" metasequence. However, whatever
follows \eb normally determines which it is. For example, the fragment
\eba matches "a" at the start of a word.
.P
The \eA, \eZ, and \ez assertions differ from the traditional circumflex and
dollar (described in the next section) in that they only ever match at the very
start and end of the subject string, whatever options are set. Thus, they are
independent of multiline mode. These three assertions are not affected by the
PCRE_NOTBOL or PCRE_NOTEOL options, which affect only the behaviour of the
circumflex and dollar metacharacters. However, if the \fIstartoffset\fP
argument of \fBpcre_exec()\fP is non-zero, indicating that matching is to start
at a point other than the beginning of the subject, \eA can never match. The
difference between \eZ and \ez is that \eZ matches before a newline at the end
of the string as well as at the very end, whereas \ez matches only at the end.
.P
The \eG assertion is true only when the current matching position is at the
start point of the match, as specified by the \fIstartoffset\fP argument of
\fBpcre_exec()\fP. It differs from \eA when the value of \fIstartoffset\fP is
non-zero. By calling \fBpcre_exec()\fP multiple times with appropriate
arguments, you can mimic Perl's /g option, and it is in this kind of
implementation where \eG can be useful.
.P
Note, however, that PCRE's interpretation of \eG, as the start of the current
match, is subtly different from Perl's, which defines it as the end of the
previous match. In Perl, these can be different when the previously matched
string was empty. Because PCRE does just one match at a time, it cannot
reproduce this behaviour.
.P
If all the alternatives of a pattern begin with \eG, the expression is anchored
to the starting match position, and the "anchored" flag is set in the compiled
regular expression.
.
.
.SH "CIRCUMFLEX AND DOLLAR"
.rs
.sp
Outside a character class, in the default matching mode, the circumflex
character is an assertion that is true only if the current matching point is
at the start of the subject string. If the \fIstartoffset\fP argument of
\fBpcre_exec()\fP is non-zero, circumflex can never match if the PCRE_MULTILINE
option is unset. Inside a character class, circumflex has an entirely different
meaning
.\" HTML <a href="#characterclass">
.\" </a>
(see below).
.\"
.P
Circumflex need not be the first character of the pattern if a number of
alternatives are involved, but it should be the first thing in each alternative
in which it appears if the pattern is ever to match that branch. If all
possible alternatives start with a circumflex, that is, if the pattern is
constrained to match only at the start of the subject, it is said to be an
"anchored" pattern. (There are also other constructs that can cause a pattern
to be anchored.)
.P
A dollar character is an assertion that is true only if the current matching
point is at the end of the subject string, or immediately before a newline
at the end of the string (by default). Dollar need not be the last character of
the pattern if a number of alternatives are involved, but it should be the last
item in any branch in which it appears. Dollar has no special meaning in a
character class.
.P
The meaning of dollar can be changed so that it matches only at the very end of
the string, by setting the PCRE_DOLLAR_ENDONLY option at compile time. This
does not affect the \eZ assertion.
.P
The meanings of the circumflex and dollar characters are changed if the
PCRE_MULTILINE option is set. When this is the case, a circumflex matches
immediately after internal newlines as well as at the start of the subject
string. It does not match after a newline that ends the string. A dollar
matches before any newlines in the string, as well as at the very end, when
PCRE_MULTILINE is set. When newline is specified as the two-character
sequence CRLF, isolated CR and LF characters do not indicate newlines.
.P
For example, the pattern /^abc$/ matches the subject string "def\enabc" (where
\en represents a newline) in multiline mode, but not otherwise. Consequently,
patterns that are anchored in single line mode because all branches start with
^ are not anchored in multiline mode, and a match for circumflex is possible
when the \fIstartoffset\fP argument of \fBpcre_exec()\fP is non-zero. The
PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set.
.P
Note that the sequences \eA, \eZ, and \ez can be used to match the start and
end of the subject in both modes, and if all branches of a pattern start with
\eA it is always anchored, whether or not PCRE_MULTILINE is set.
.
.
.SH "FULL STOP (PERIOD, DOT)"
.rs
.sp
Outside a character class, a dot in the pattern matches any one character in
the subject string except (by default) a character that signifies the end of a
line. In UTF-8 mode, the matched character may be more than one byte long.
.P
When a line ending is defined as a single character, dot never matches that
character; when the two-character sequence CRLF is used, dot does not match CR
if it is immediately followed by LF, but otherwise it matches all characters
(including isolated CRs and LFs). When any Unicode line endings are being
recognized, dot does not match CR or LF or any of the other line ending
characters.
.P
The behaviour of dot with regard to newlines can be changed. If the PCRE_DOTALL
option is set, a dot matches any one character, without exception. If the
two-character sequence CRLF is present in the subject string, it takes two dots
to match it.
.P
The handling of dot is entirely independent of the handling of circumflex and
dollar, the only relationship being that they both involve newlines. Dot has no
special meaning in a character class.
.
.
.SH "MATCHING A SINGLE BYTE"
.rs
.sp
Outside a character class, the escape sequence \eC matches any one byte, both
in and out of UTF-8 mode. Unlike a dot, it always matches any line-ending
characters. The feature is provided in Perl in order to match individual bytes
in UTF-8 mode. Because it breaks up UTF-8 characters into individual bytes,
what remains in the string may be a malformed UTF-8 string. For this reason,
the \eC escape sequence is best avoided.
.P
PCRE does not allow \eC to appear in lookbehind assertions
.\" HTML <a href="#lookbehind">
.\" </a>
(described below),
.\"
because in UTF-8 mode this would make it impossible to calculate the length of
the lookbehind.
.
.
.\" HTML <a name="characterclass"></a>
.SH "SQUARE BRACKETS AND CHARACTER CLASSES"
.rs
.sp
An opening square bracket introduces a character class, terminated by a closing
square bracket. A closing square bracket on its own is not special by default.
However, if the PCRE_JAVASCRIPT_COMPAT option is set, a lone closing square
bracket causes a compile-time error. If a closing square bracket is required as
a member of the class, it should be the first data character in the class
(after an initial circumflex, if present) or escaped with a backslash.
.P
A character class matches a single character in the subject. In UTF-8 mode, the
character may be more than one byte long. A matched character must be in the
set of characters defined by the class, unless the first character in the class
definition is a circumflex, in which case the subject character must not be in
the set defined by the class. If a circumflex is actually required as a member
of the class, ensure it is not the first character, or escape it with a
backslash.
.P
For example, the character class [aeiou] matches any lower case vowel, while
[^aeiou] matches any character that is not a lower case vowel. Note that a
circumflex is just a convenient notation for specifying the characters that
are in the class by enumerating those that are not. A class that starts with a
circumflex is not an assertion; it still consumes a character from the subject
string, and therefore it fails if the current pointer is at the end of the
string.
.P
In UTF-8 mode, characters with values greater than 255 can be included in a
class as a literal string of bytes, or by using the \ex{ escaping mechanism.
.P
When caseless matching is set, any letters in a class represent both their
upper case and lower case versions, so for example, a caseless [aeiou] matches
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
caseful version would. In UTF-8 mode, PCRE always understands the concept of
case for characters whose values are less than 128, so caseless matching is
always possible. For characters with higher values, the concept of case is
supported if PCRE is compiled with Unicode property support, but not otherwise.
If you want to use caseless matching in UTF8-mode for characters 128 and above,
you must ensure that PCRE is compiled with Unicode property support as well as
with UTF-8 support.
.P
Characters that might indicate line breaks are never treated in any special way
when matching character classes, whatever line-ending sequence is in use, and
whatever setting of the PCRE_DOTALL and PCRE_MULTILINE options is used. A class
such as [^a] always matches one of these characters.
.P
The minus (hyphen) character can be used to specify a range of characters in a
character class. For example, [d-m] matches any letter between d and m,
inclusive. If a minus character is required in a class, it must be escaped with
a backslash or appear in a position where it cannot be interpreted as
indicating a range, typically as the first or last character in the class.
.P
It is not possible to have the literal character "]" as the end character of a
range. A pattern such as [W-]46] is interpreted as a class of two characters
("W" and "-") followed by a literal string "46]", so it would match "W46]" or
"-46]". However, if the "]" is escaped with a backslash it is interpreted as
the end of range, so [W-\e]46] is interpreted as a class containing a range
followed by two other characters. The octal or hexadecimal representation of
"]" can also be used to end a range.
.P
Ranges operate in the collating sequence of character values. They can also be
used for characters specified numerically, for example [\e000-\e037]. In UTF-8
mode, ranges can include characters whose values are greater than 255, for
example [\ex{100}-\ex{2ff}].
.P
If a range that includes letters is used when caseless matching is set, it
matches the letters in either case. For example, [W-c] is equivalent to
[][\e\e^_`wxyzabc], matched caselessly, and in non-UTF-8 mode, if character
tables for a French locale are in use, [\exc8-\excb] matches accented E
characters in both cases. In UTF-8 mode, PCRE supports the concept of case for
characters with values greater than 128 only when it is compiled with Unicode
property support.
.P
The character types \ed, \eD, \ep, \eP, \es, \eS, \ew, and \eW may also appear
in a character class, and add the characters that they match to the class. For
example, [\edABCDEF] matches any hexadecimal digit. A circumflex can
conveniently be used with the upper case character types to specify a more
restricted set of characters than the matching lower case type. For example,
the class [^\eW_] matches any letter or digit, but not underscore.
.P
The only metacharacters that are recognized in character classes are backslash,
hyphen (only where it can be interpreted as specifying a range), circumflex
(only at the start), opening square bracket (only when it can be interpreted as
introducing a POSIX class name - see the next section), and the terminating
closing square bracket. However, escaping other non-alphanumeric characters
does no harm.
.
.
.SH "POSIX CHARACTER CLASSES"
.rs
.sp
Perl supports the POSIX notation for character classes. This uses names
enclosed by [: and :] within the enclosing square brackets. PCRE also supports
this notation. For example,
.sp
  [01[:alpha:]%]
.sp
matches "0", "1", any alphabetic character, or "%". The supported class names
are
.sp
  alnum    letters and digits
  alpha    letters
  ascii    character codes 0 - 127
  blank    space or tab only
  cntrl    control characters
  digit    decimal digits (same as \ed)
  graph    printing characters, excluding space
  lower    lower case letters
  print    printing characters, including space
  punct    printing characters, excluding letters and digits
  space    white space (not quite the same as \es)
  upper    upper case letters
  word     "word" characters (same as \ew)
  xdigit   hexadecimal digits
.sp
The "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), and
space (32). Notice that this list includes the VT character (code 11). This
makes "space" different to \es, which does not include VT (for Perl
compatibility).
.P
The name "word" is a Perl extension, and "blank" is a GNU extension from Perl
5.8. Another Perl extension is negation, which is indicated by a ^ character
after the colon. For example,
.sp
  [12[:^digit:]]
.sp
matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the POSIX
syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
supported, and an error is given if they are encountered.
.P
In UTF-8 mode, characters with values greater than 128 do not match any of
the POSIX character classes.
.
.
.SH "VERTICAL BAR"
.rs
.sp
Vertical bar characters are used to separate alternative patterns. For example,
the pattern
.sp
  gilbert|sullivan
.sp
matches either "gilbert" or "sullivan". Any number of alternatives may appear,
and an empty alternative is permitted (matching the empty string). The matching
process tries each alternative in turn, from left to right, and the first one
that succeeds is used. If the alternatives are within a subpattern
.\" HTML <a href="#subpattern">
.\" </a>
(defined below),
.\"
"succeeds" means matching the rest of the main pattern as well as the
alternative in the subpattern.
.
.
.SH "INTERNAL OPTION SETTING"
.rs
.sp
The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and
PCRE_EXTENDED options (which are Perl-compatible) can be changed from within
the pattern by a sequence of Perl option letters enclosed between "(?" and ")".
The option letters are
.sp
  i  for PCRE_CASELESS
  m  for PCRE_MULTILINE
  s  for PCRE_DOTALL
  x  for PCRE_EXTENDED
.sp
For example, (?im) sets caseless, multiline matching. It is also possible to
unset these options by preceding the letter with a hyphen, and a combined
setting and unsetting such as (?im-sx), which sets PCRE_CASELESS and
PCRE_MULTILINE while unsetting PCRE_DOTALL and PCRE_EXTENDED, is also
permitted. If a letter appears both before and after the hyphen, the option is
unset.
.P
The PCRE-specific options PCRE_DUPNAMES, PCRE_UNGREEDY, and PCRE_EXTRA can be
changed in the same way as the Perl-compatible options by using the characters
J, U and X respectively.
.P
When one of these option changes occurs at top level (that is, not inside
subpattern parentheses), the change applies to the remainder of the pattern
that follows. If the change is placed right at the start of a pattern, PCRE
extracts it into the global options (and it will therefore show up in data
extracted by the \fBpcre_fullinfo()\fP function).
.P
An option change within a subpattern (see below for a description of
subpatterns) affects only that part of the current pattern that follows it, so
.sp
  (a(?i)b)c
.sp
matches abc and aBc and no other strings (assuming PCRE_CASELESS is not used).
By this means, options can be made to have different settings in different
parts of the pattern. Any changes made in one alternative do carry on
into subsequent branches within the same subpattern. For example,
.sp
  (a(?i)b|c)
.sp
matches "ab", "aB", "c", and "C", even though when matching "C" the first
branch is abandoned before the option setting. This is because the effects of
option settings happen at compile time. There would be some very weird
behaviour otherwise.
.P
\fBNote:\fP There are other PCRE-specific options that can be set by the
application when the compile or match functions are called. In some cases the
pattern can contain special leading sequences such as (*CRLF) to override what
the application has set or what has been defaulted. Details are given in the
section entitled
.\" HTML <a href="#newlineseq">
.\" </a>
"Newline sequences"
.\"
above. There is also the (*UTF8) leading sequence that can be used to set UTF-8
mode; this is equivalent to setting the PCRE_UTF8 option.
.
.
.\" HTML <a name="subpattern"></a>
.SH SUBPATTERNS
.rs
.sp
Subpatterns are delimited by parentheses (round brackets), which can be nested.
Turning part of a pattern into a subpattern does two things:
.sp
1. It localizes a set of alternatives. For example, the pattern
.sp
  cat(aract|erpillar|)
.sp
matches one of the words "cat", "cataract", or "caterpillar". Without the
parentheses, it would match "cataract", "erpillar" or an empty string.
.sp
2. It sets up the subpattern as a capturing subpattern. This means that, when
the whole pattern matches, that portion of the subject string that matched the
subpattern is passed back to the caller via the \fIovector\fP argument of
\fBpcre_exec()\fP. Opening parentheses are counted from left to right (starting
from 1) to obtain numbers for the capturing subpatterns.
.P
For example, if the string "the red king" is matched against the pattern
.sp
  the ((red|white) (king|queen))
.sp
the captured substrings are "red king", "red", and "king", and are numbered 1,
2, and 3, respectively.
.P
The fact that plain parentheses fulfil two functions is not always helpful.
There are often times when a grouping subpattern is required without a
capturing requirement. If an opening parenthesis is followed by a question mark
and a colon, the subpattern does not do any capturing, and is not counted when
computing the number of any subsequent capturing subpatterns. For example, if
the string "the white queen" is matched against the pattern
.sp
  the ((?:red|white) (king|queen))
.sp
the captured substrings are "white queen" and "queen", and are numbered 1 and
2. The maximum number of capturing subpatterns is 65535.
.P
As a convenient shorthand, if any option settings are required at the start of
a non-capturing subpattern, the option letters may appear between the "?" and
the ":". Thus the two patterns
.sp
  (?i:saturday|sunday)
  (?:(?i)saturday|sunday)
.sp
match exactly the same set of strings. Because alternative branches are tried
from left to right, and options are not reset until the end of the subpattern
is reached, an option setting in one branch does affect subsequent branches, so
the above patterns match "SUNDAY" as well as "Saturday".
.
.
.\" HTML <a name="dupsubpatternnumber"></a>
.SH "DUPLICATE SUBPATTERN NUMBERS"
.rs
.sp
Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
the same numbers for its capturing parentheses. Such a subpattern starts with
(?| and is itself a non-capturing subpattern. For example, consider this
pattern:
.sp
  (?|(Sat)ur|(Sun))day
.sp
Because the two alternatives are inside a (?| group, both sets of capturing
parentheses are numbered one. Thus, when the pattern matches, you can look
at captured substring number one, whichever alternative matched. This construct
is useful when you want to capture part, but not all, of one of a number of
alternatives. Inside a (?| group, parentheses are numbered as usual, but the
number is reset at the start of each branch. The numbers of any capturing
buffers that follow the subpattern start after the highest number used in any
branch. The following example is taken from the Perl documentation.
The numbers underneath show in which buffer the captured content will be
stored.
.sp
  # before  ---------------branch-reset----------- after
  / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
  # 1            2         2  3        2     3     4
.sp
A back reference to a numbered subpattern uses the most recent value that is
set for that number by any subpattern. The following pattern matches "abcabc"
or "defdef":
.sp
  /(?|(abc)|(def))\e1/
.sp
In contrast, a recursive or "subroutine" call to a numbered subpattern always
refers to the first one in the pattern with the given number. The following
pattern matches "abcabc" or "defabc":
.sp
  /(?|(abc)|(def))(?1)/
.sp
If a
.\" HTML <a href="#conditions">
.\" </a>
condition test
.\"
for a subpattern's having matched refers to a non-unique number, the test is
true if any of the subpatterns of that number have matched.
.P
An alternative approach to using this "branch reset" feature is to use
duplicate named subpatterns, as described in the next section.
.
.
.SH "NAMED SUBPATTERNS"
.rs
.sp
Identifying capturing parentheses by number is simple, but it can be very hard
to keep track of the numbers in complicated regular expressions. Furthermore,
if an expression is modified, the numbers may change. To help with this
difficulty, PCRE supports the naming of subpatterns. This feature was not
added to Perl until release 5.10. Python had the feature earlier, and PCRE
introduced it at release 4.0, using the Python syntax. PCRE now supports both
the Perl and the Python syntax. Perl allows identically numbered subpatterns to
have different names, but PCRE does not.
.P
In PCRE, a subpattern can be named in one of three ways: (?<name>...) or
(?'name'...) as in Perl, or (?P<name>...) as in Python. References to capturing
parentheses from other parts of the pattern, such as
.\" HTML <a href="#backreferences">
.\" </a>
back references,
.\"
.\" HTML <a href="#recursion">
.\" </a>
recursion,
.\"
and
.\" HTML <a href="#conditions">
.\" </a>
conditions,
.\"
can be made by name as well as by number.
.P
Names consist of up to 32 alphanumeric characters and underscores. Named
capturing parentheses are still allocated numbers as well as names, exactly as
if the names were not present. The PCRE API provides function calls for
extracting the name-to-number translation table from a compiled pattern. There
is also a convenience function for extracting a captured substring by name.
.P
By default, a name must be unique within a pattern, but it is possible to relax
this constraint by setting the PCRE_DUPNAMES option at compile time. (Duplicate
names are also always permitted for subpatterns with the same number, set up as
described in the previous section.) Duplicate names can be useful for patterns
where only one instance of the named parentheses can match. Suppose you want to
match the name of a weekday, either as a 3-letter abbreviation or as the full
name, and in both cases you want to extract the abbreviation. This pattern
(ignoring the line breaks) does the job:
.sp
  (?<DN>Mon|Fri|Sun)(?:day)?|
  (?<DN>Tue)(?:sday)?|
  (?<DN>Wed)(?:nesday)?|
  (?<DN>Thu)(?:rsday)?|
  (?<DN>Sat)(?:urday)?
.sp
There are five capturing substrings, but only one is ever set after a match.
(An alternative way of solving this problem is to use a "branch reset"
subpattern, as described in the previous section.)
.P
The convenience function for extracting the data by name returns the substring
for the first (and in this example, the only) subpattern of that name that
matched. This saves searching to find which numbered subpattern it was.
.P
If you make a back reference to a non-unique named subpattern from elsewhere in
the pattern, the one that corresponds to the first occurrence of the name is
used. In the absence of duplicate numbers (see the previous section) this is
the one with the lowest number. If you use a named reference in a condition
test (see the
.\"
.\" HTML <a href="#conditions">
.\" </a>
section about conditions
.\"
below), either to check whether a subpattern has matched, or to check for
recursion, all subpatterns with the same name are tested. If the condition is
true for any one of them, the overall condition is true. This is the same
behaviour as testing by number. For further details of the interfaces for
handling named subpatterns, see the
.\" HREF
\fBpcreapi\fP
.\"
documentation.
.P
\fBWarning:\fP You cannot use different names to distinguish between two
subpatterns with the same number because PCRE uses only the numbers when
matching. For this reason, an error is given at compile time if different names
are given to subpatterns with the same number. However, you can give the same
name to subpatterns with the same number, even when PCRE_DUPNAMES is not set.
.
.
.SH REPETITION
.rs
.sp
Repetition is specified by quantifiers, which can follow any of the following
items:
.sp
  a literal data character
  the dot metacharacter
  the \eC escape sequence
  the \eX escape sequence (in UTF-8 mode with Unicode properties)
  the \eR escape sequence
  an escape such as \ed that matches a single character
  a character class
  a back reference (see next section)
  a parenthesized subpattern (unless it is an assertion)
  a recursive or "subroutine" call to a subpattern
.sp
The general repetition quantifier specifies a minimum and maximum number of
permitted matches, by giving the two numbers in curly brackets (braces),
separated by a comma. The numbers must be less than 65536, and the first must
be less than or equal to the second. For example:
.sp
  z{2,4}
.sp
matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special
character. If the second number is omitted, but the comma is present, there is
no upper limit; if the second number and the comma are both omitted, the
quantifier specifies an exact number of required matches. Thus
.sp
  [aeiou]{3,}
.sp
matches at least 3 successive vowels, but may match many more, while
.sp
  \ed{8}
.sp
matches exactly 8 digits. An opening curly bracket that appears in a position
where a quantifier is not allowed, or one that does not match the syntax of a
quantifier, is taken as a literal character. For example, {,6} is not a
quantifier, but a literal string of four characters.
.P
In UTF-8 mode, quantifiers apply to UTF-8 characters rather than to individual
bytes. Thus, for example, \ex{100}{2} matches two UTF-8 characters, each of
which is represented by a two-byte sequence. Similarly, when Unicode property
support is available, \eX{3} matches three Unicode extended sequences, each of
which may be several bytes long (and they may be of different lengths).
.P
The quantifier {0} is permitted, causing the expression to behave as if the
previous item and the quantifier were not present. This may be useful for
subpatterns that are referenced as
.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
subroutines
.\"
from elsewhere in the pattern. Items other than subpatterns that have a {0}
quantifier are omitted from the compiled pattern.
.P
For convenience, the three most common quantifiers have single-character
abbreviations:
.sp
  *    is equivalent to {0,}
  +    is equivalent to {1,}
  ?    is equivalent to {0,1}
.sp
It is possible to construct infinite loops by following a subpattern that can
match no characters with a quantifier that has no upper limit, for example:
.sp
  (a?)*
.sp
Earlier versions of Perl and PCRE used to give an error at compile time for
such patterns. However, because there are cases where this can be useful, such
patterns are now accepted, but if any repetition of the subpattern does in fact
match no characters, the loop is forcibly broken.
.P
By default, the quantifiers are "greedy", that is, they match as much as
possible (up to the maximum number of permitted times), without causing the
rest of the pattern to fail. The classic example of where this gives problems
is in trying to match comments in C programs. These appear between /* and */
and within the comment, individual * and / characters may appear. An attempt to
match C comments by applying the pattern
.sp
  /\e*.*\e*/
.sp
to the string
.sp
  /* first comment */  not comment  /* second comment */
.sp
fails, because it matches the entire string owing to the greediness of the .*
item.
.P
However, if a quantifier is followed by a question mark, it ceases to be
greedy, and instead matches the minimum number of times possible, so the
pattern
.sp
  /\e*.*?\e*/
.sp
does the right thing with the C comments. The meaning of the various
quantifiers is not otherwise changed, just the preferred number of matches.
Do not confuse this use of question mark with its use as a quantifier in its
own right. Because it has two uses, it can sometimes appear doubled, as in
.sp
  \ed??\ed
.sp
which matches one digit by preference, but can match two if that is the only
way the rest of the pattern matches.
.P
If the PCRE_UNGREEDY option is set (an option that is not available in Perl),
the quantifiers are not greedy by default, but individual ones can be made
greedy by following them with a question mark. In other words, it inverts the
default behaviour.
.P
When a parenthesized subpattern is quantified with a minimum repeat count that
is greater than 1 or with a limited maximum, more memory is required for the
compiled pattern, in proportion to the size of the minimum or maximum.
.P
If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equivalent
to Perl's /s) is set, thus allowing the dot to match newlines, the pattern is
implicitly anchored, because whatever follows will be tried against every
character position in the subject string, so there is no point in retrying the
overall match at any position after the first. PCRE normally treats such a
pattern as though it were preceded by \eA.
.P
In cases where it is known that the subject string contains no newlines, it is
worth setting PCRE_DOTALL in order to obtain this optimization, or
alternatively using ^ to indicate anchoring explicitly.
.P
However, there is one situation where the optimization cannot be used. When .*
is inside capturing parentheses that are the subject of a back reference
elsewhere in the pattern, a match at the start may fail where a later one
succeeds. Consider, for example:
.sp
  (.*)abc\e1
.sp
If the subject is "xyz123abc123" the match point is the fourth character. For
this reason, such a pattern is not implicitly anchored.
.P
When a capturing subpattern is repeated, the value captured is the substring
that matched the final iteration. For example, after
.sp
  (tweedle[dume]{3}\es*)+
.sp
has matched "tweedledum tweedledee" the value of the captured substring is
"tweedledee". However, if there are nested capturing subpatterns, the
corresponding captured values may have been set in previous iterations. For
example, after
.sp
  /(a|(b))+/
.sp
matches "aba" the value of the second captured substring is "b".
.
.
.\" HTML <a name="atomicgroup"></a>
.SH "ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS"
.rs
.sp
With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
repetition, failure of what follows normally causes the repeated item to be
re-evaluated to see if a different number of repeats allows the rest of the
pattern to match. Sometimes it is useful to prevent this, either to change the
nature of the match, or to cause it fail earlier than it otherwise might, when
the author of the pattern knows there is no point in carrying on.
.P
Consider, for example, the pattern \ed+foo when applied to the subject line
.sp
  123456bar
.sp
After matching all 6 digits and then failing to match "foo", the normal
action of the matcher is to try again with only 5 digits matching the \ed+
item, and then with 4, and so on, before ultimately failing. "Atomic grouping"
(a term taken from Jeffrey Friedl's book) provides the means for specifying
that once a subpattern has matched, it is not to be re-evaluated in this way.
.P
If we use atomic grouping for the previous example, the matcher gives up
immediately on failing to match "foo" the first time. The notation is a kind of
special parenthesis, starting with (?> as in this example:
.sp
  (?>\ed+)foo
.sp
This kind of parenthesis "locks up" the  part of the pattern it contains once
it has matched, and a failure further into the pattern is prevented from
backtracking into it. Backtracking past it to previous items, however, works as
normal.
.P
An alternative description is that a subpattern of this type matches the string
of characters that an identical standalone pattern would match, if anchored at
the current point in the subject string.
.P
Atomic grouping subpatterns are not capturing subpatterns. Simple cases such as
the above example can be thought of as a maximizing repeat that must swallow
everything it can. So, while both \ed+ and \ed+? are prepared to adjust the
number of digits they match in order to make the rest of the pattern match,
(?>\ed+) can only match an entire sequence of digits.
.P
Atomic groups in general can of course contain arbitrarily complicated
subpatterns, and can be nested. However, when the subpattern for an atomic
group is just a single repeated item, as in the example above, a simpler
notation, called a "possessive quantifier" can be used. This consists of an
additional + character following a quantifier. Using this notation, the
previous example can be rewritten as
.sp
  \ed++foo
.sp
Note that a possessive quantifier can be used with an entire group, for
example:
.sp
  (abc|xyz){2,3}+
.sp
Possessive quantifiers are always greedy; the setting of the PCRE_UNGREEDY
option is ignored. They are a convenient notation for the simpler forms of
atomic group. However, there is no difference in the meaning of a possessive
quantifier and the equivalent atomic group, though there may be a performance
difference; possessive quantifiers should be slightly faster.
.P
The possessive quantifier syntax is an extension to the Perl 5.8 syntax.
Jeffrey Friedl originated the idea (and the name) in the first edition of his
book. Mike McCloskey liked it, so implemented it when he built Sun's Java
package, and PCRE copied it from there. It ultimately found its way into Perl
at release 5.10.
.P
PCRE has an optimization that automatically "possessifies" certain simple
pattern constructs. For example, the sequence A+B is treated as A++B because
there is no point in backtracking into a sequence of A's when B must follow.
.P
When a pattern contains an unlimited repeat inside a subpattern that can itself
be repeated an unlimited number of times, the use of an atomic group is the
only way to avoid some failing matches taking a very long time indeed. The
pattern
.sp
  (\eD+|<\ed+>)*[!?]
.sp
matches an unlimited number of substrings that either consist of non-digits, or
digits enclosed in <>, followed by either ! or ?. When it matches, it runs
quickly. However, if it is applied to
.sp
  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
.sp
it takes a long time before reporting failure. This is because the string can
be divided between the internal \eD+ repeat and the external * repeat in a
large number of ways, and all have to be tried. (The example uses [!?] rather
than a single character at the end, because both PCRE and Perl have an
optimization that allows for fast failure when a single character is used. They
remember the last single character that is required for a match, and fail early
if it is not present in the string.) If the pattern is changed so that it uses
an atomic group, like this:
.sp
  ((?>\eD+)|<\ed+>)*[!?]
.sp
sequences of non-digits cannot be broken, and failure happens quickly.
.
.
.\" HTML <a name="backreferences"></a>
.SH "BACK REFERENCES"
.rs
.sp
Outside a character class, a backslash followed by a digit greater than 0 (and
possibly further digits) is a back reference to a capturing subpattern earlier
(that is, to its left) in the pattern, provided there have been that many
previous capturing left parentheses.
.P
However, if the decimal number following the backslash is less than 10, it is
always taken as a back reference, and causes an error only if there are not
that many capturing left parentheses in the entire pattern. In other words, the
parentheses that are referenced need not be to the left of the reference for
numbers less than 10. A "forward back reference" of this type can make sense
when a repetition is involved and the subpattern to the right has participated
in an earlier iteration.
.P
It is not possible to have a numerical "forward back reference" to a subpattern
whose number is 10 or more using this syntax because a sequence such as \e50 is
interpreted as a character defined in octal. See the subsection entitled
"Non-printing characters"
.\" HTML <a href="#digitsafterbackslash">
.\" </a>
above
.\"
for further details of the handling of digits following a backslash. There is
no such problem when named parentheses are used. A back reference to any
subpattern is possible using named parentheses (see below).
.P
Another way of avoiding the ambiguity inherent in the use of digits following a
backslash is to use the \eg escape sequence, which is a feature introduced in
Perl 5.10. This escape must be followed by an unsigned number or a negative
number, optionally enclosed in braces. These examples are all identical:
.sp
  (ring), \e1
  (ring), \eg1
  (ring), \eg{1}
.sp
An unsigned number specifies an absolute reference without the ambiguity that
is present in the older syntax. It is also useful when literal digits follow
the reference. A negative number is a relative reference. Consider this
example:
.sp
  (abc(def)ghi)\eg{-1}
.sp
The sequence \eg{-1} is a reference to the most recently started capturing
subpattern before \eg, that is, is it equivalent to \e2. Similarly, \eg{-2}
would be equivalent to \e1. The use of relative references can be helpful in
long patterns, and also in patterns that are created by joining together
fragments that contain references within themselves.
.P
A back reference matches whatever actually matched the capturing subpattern in
the current subject string, rather than anything matching the subpattern
itself (see
.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
"Subpatterns as subroutines"
.\"
below for a way of doing that). So the pattern
.sp
  (sens|respons)e and \e1ibility
.sp
matches "sense and sensibility" and "response and responsibility", but not
"sense and responsibility". If caseful matching is in force at the time of the
back reference, the case of letters is relevant. For example,
.sp
  ((?i)rah)\es+\e1
.sp
matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
capturing subpattern is matched caselessly.
.P
There are several different ways of writing back references to named
subpatterns. The .NET syntax \ek{name} and the Perl syntax \ek<name> or
\ek'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified
back reference syntax, in which \eg can be used for both numeric and named
references, is also supported. We could rewrite the above example in any of
the following ways:
.sp
  (?<p1>(?i)rah)\es+\ek<p1>
  (?'p1'(?i)rah)\es+\ek{p1}
  (?P<p1>(?i)rah)\es+(?P=p1)
  (?<p1>(?i)rah)\es+\eg{p1}
.sp
A subpattern that is referenced by name may appear in the pattern before or
after the reference.
.P
There may be more than one back reference to the same subpattern. If a
subpattern has not actually been used in a particular match, any back
references to it always fail by default. For example, the pattern
.sp
  (a|(bc))\e2
.sp
always fails if it starts to match "a" rather than "bc". However, if the
PCRE_JAVASCRIPT_COMPAT option is set at compile time, a back reference to an
unset value matches an empty string.
.P
Because there may be many capturing parentheses in a pattern, all digits
following a backslash are taken as part of a potential back reference number.
If the pattern continues with a digit character, some delimiter must be used to
terminate the back reference. If the PCRE_EXTENDED option is set, this can be
whitespace. Otherwise, the \eg{ syntax or an empty comment (see
.\" HTML <a href="#comments">
.\" </a>
"Comments"
.\"
below) can be used.
.
.SS "Recursive back references"
.rs
.sp
A back reference that occurs inside the parentheses to which it refers fails
when the subpattern is first used, so, for example, (a\e1) never matches.
However, such references can be useful inside repeated subpatterns. For
example, the pattern
.sp
  (a|b\e1)+
.sp
matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of
the subpattern, the back reference matches the character string corresponding
to the previous iteration. In order for this to work, the pattern must be such
that the first iteration does not need to match the back reference. This can be
done using alternation, as in the example above, or by a quantifier with a
minimum of zero.
.P
Back references of this type cause the group that they reference to be treated
as an
.\" HTML <a href="#atomicgroup">
.\" </a>
atomic group.
.\"
Once the whole group has been matched, a subsequent matching failure cannot
cause backtracking into the middle of the group.
.
.
.\" HTML <a name="bigassertions"></a>
.SH ASSERTIONS
.rs
.sp
An assertion is a test on the characters following or preceding the current
matching point that does not actually consume any characters. The simple
assertions coded as \eb, \eB, \eA, \eG, \eZ, \ez, ^ and $ are described
.\" HTML <a href="#smallassertions">
.\" </a>
above.
.\"
.P
More complicated assertions are coded as subpatterns. There are two kinds:
those that look ahead of the current position in the subject string, and those
that look behind it. An assertion subpattern is matched in the normal way,
except that it does not cause the current matching position to be changed.
.P
Assertion subpatterns are not capturing subpatterns, and may not be repeated,
because it makes no sense to assert the same thing several times. If any kind
of assertion contains capturing subpatterns within it, these are counted for
the purposes of numbering the capturing subpatterns in the whole pattern.
However, substring capturing is carried out only for positive assertions,
because it does not make sense for negative assertions.
.
.
.SS "Lookahead assertions"
.rs
.sp
Lookahead assertions start with (?= for positive assertions and (?! for
negative assertions. For example,
.sp
  \ew+(?=;)
.sp
matches a word followed by a semicolon, but does not include the semicolon in
the match, and
.sp
  foo(?!bar)
.sp
matches any occurrence of "foo" that is not followed by "bar". Note that the
apparently similar pattern
.sp
  (?!foo)bar
.sp
does not find an occurrence of "bar" that is preceded by something other than
"foo"; it finds any occurrence of "bar" whatsoever, because the assertion
(?!foo) is always true when the next three characters are "bar". A
lookbehind assertion is needed to achieve the other effect.
.P
If you want to force a matching failure at some point in a pattern, the most
convenient way to do it is with (?!) because an empty string always matches, so
an assertion that requires there not to be an empty string must always fail.
The Perl 5.10 backtracking control verb (*FAIL) or (*F) is essentially a
synonym for (?!).
.
.
.\" HTML <a name="lookbehind"></a>
.SS "Lookbehind assertions"
.rs
.sp
Lookbehind assertions start with (?<= for positive assertions and (?<! for
negative assertions. For example,
.sp
  (?<!foo)bar
.sp
does find an occurrence of "bar" that is not preceded by "foo". The contents of
a lookbehind assertion are restricted such that all the strings it matches must
have a fixed length. However, if there are several top-level alternatives, they
do not all have to have the same fixed length. Thus
.sp
  (?<=bullock|donkey)
.sp
is permitted, but
.sp
  (?<!dogs?|cats?)
.sp
causes an error at compile time. Branches that match different length strings
are permitted only at the top level of a lookbehind assertion. This is an
extension compared with Perl (5.8 and 5.10), which requires all branches to
match the same length of string. An assertion such as
.sp
  (?<=ab(c|de))
.sp
is not permitted, because its single top-level branch can match two different
lengths, but it is acceptable to PCRE if rewritten to use two top-level
branches:
.sp
  (?<=abc|abde)
.sp
In some cases, the Perl 5.10 escape sequence \eK
.\" HTML <a href="#resetmatchstart">
.\" </a>
(see above)
.\"
can be used instead of a lookbehind assertion to get round the fixed-length
restriction.
.P
The implementation of lookbehind assertions is, for each alternative, to
temporarily move the current position back by the fixed length and then try to
match. If there are insufficient characters before the current position, the
assertion fails.
.P
PCRE does not allow the \eC escape (which matches a single byte in UTF-8 mode)
to appear in lookbehind assertions, because it makes it impossible to calculate
the length of the lookbehind. The \eX and \eR escapes, which can match
different numbers of bytes, are also not permitted.
.P
.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
"Subroutine"
.\"
calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long
as the subpattern matches a fixed-length string.
.\" HTML <a href="#recursion">
.\" </a>
Recursion,
.\"
however, is not supported.
.P
Possessive quantifiers can be used in conjunction with lookbehind assertions to
specify efficient matching of fixed-length strings at the end of subject
strings. Consider a simple pattern such as
.sp
  abcd$
.sp
when applied to a long string that does not match. Because matching proceeds
from left to right, PCRE will look for each "a" in the subject and then see if
what follows matches the rest of the pattern. If the pattern is specified as
.sp
  ^.*abcd$
.sp
the initial .* matches the entire string at first, but when this fails (because
there is no following "a"), it backtracks to match all but the last character,
then all but the last two characters, and so on. Once again the search for "a"
covers the entire string, from right to left, so we are no better off. However,
if the pattern is written as
.sp
  ^.*+(?<=abcd)
.sp
there can be no backtracking for the .*+ item; it can match only the entire
string. The subsequent lookbehind assertion does a single test on the last four
characters. If it fails, the match fails immediately. For long strings, this
approach makes a significant difference to the processing time.
.
.
.SS "Using multiple assertions"
.rs
.sp
Several assertions (of any sort) may occur in succession. For example,
.sp
  (?<=\ed{3})(?<!999)foo
.sp
matches "foo" preceded by three digits that are not "999". Notice that each of
the assertions is applied independently at the same point in the subject
string. First there is a check that the previous three characters are all
digits, and then there is a check that the same three characters are not "999".
This pattern does \fInot\fP match "foo" preceded by six characters, the first
of which are digits and the last three of which are not "999". For example, it
doesn't match "123abcfoo". A pattern to do that is
.sp
  (?<=\ed{3}...)(?<!999)foo
.sp
This time the first assertion looks at the preceding six characters, checking
that the first three are digits, and then the second assertion checks that the
preceding three characters are not "999".
.P
Assertions can be nested in any combination. For example,
.sp
  (?<=(?<!foo)bar)baz
.sp
matches an occurrence of "baz" that is preceded by "bar" which in turn is not
preceded by "foo", while
.sp
  (?<=\ed{3}(?!999)...)foo
.sp
is another pattern that matches "foo" preceded by three digits and any three
characters that are not "999".
.
.
.\" HTML <a name="conditions"></a>
.SH "CONDITIONAL SUBPATTERNS"
.rs
.sp
It is possible to cause the matching process to obey a subpattern
conditionally or to choose between two alternative subpatterns, depending on
the result of an assertion, or whether a specific capturing subpattern has
already been matched. The two possible forms of conditional subpattern are:
.sp
  (?(condition)yes-pattern)
  (?(condition)yes-pattern|no-pattern)
.sp
If the condition is satisfied, the yes-pattern is used; otherwise the
no-pattern (if present) is used. If there are more than two alternatives in the
subpattern, a compile-time error occurs.
.P
There are four kinds of condition: references to subpatterns, references to
recursion, a pseudo-condition called DEFINE, and assertions.
.
.SS "Checking for a used subpattern by number"
.rs
.sp
If the text between the parentheses consists of a sequence of digits, the
condition is true if a capturing subpattern of that number has previously
matched. If there is more than one capturing subpattern with the same number
(see the earlier
.\"
.\" HTML <a href="#recursion">
.\" </a>
section about duplicate subpattern numbers),
.\"
the condition is true if any of them have been set. An alternative notation is
to precede the digits with a plus or minus sign. In this case, the subpattern
number is relative rather than absolute. The most recently opened parentheses
can be referenced by (?(-1), the next most recent by (?(-2), and so on. In
looping constructs it can also make sense to refer to subsequent groups with
constructs such as (?(+2).
.P
Consider the following pattern, which contains non-significant white space to
make it more readable (assume the PCRE_EXTENDED option) and to divide it into
three parts for ease of discussion:
.sp
  ( \e( )?    [^()]+    (?(1) \e) )
.sp
The first part matches an optional opening parenthesis, and if that
character is present, sets it as the first captured substring. The second part
matches one or more characters that are not parentheses. The third part is a
conditional subpattern that tests whether the first set of parentheses matched
or not. If they did, that is, if subject started with an opening parenthesis,
the condition is true, and so the yes-pattern is executed and a closing
parenthesis is required. Otherwise, since no-pattern is not present, the
subpattern matches nothing. In other words, this pattern matches a sequence of
non-parentheses, optionally enclosed in parentheses.
.P
If you were embedding this pattern in a larger one, you could use a relative
reference:
.sp
  ...other stuff... ( \e( )?    [^()]+    (?(-1) \e) ) ...
.sp
This makes the fragment independent of the parentheses in the larger pattern.
.
.SS "Checking for a used subpattern by name"
.rs
.sp
Perl uses the syntax (?(<name>)...) or (?('name')...) to test for a used
subpattern by name. For compatibility with earlier versions of PCRE, which had
this facility before Perl, the syntax (?(name)...) is also recognized. However,
there is a possible ambiguity with this syntax, because subpattern names may
consist entirely of digits. PCRE looks first for a named subpattern; if it
cannot find one and the name consists entirely of digits, PCRE looks for a
subpattern of that number, which must be greater than zero. Using subpattern
names that consist entirely of digits is not recommended.
.P
Rewriting the above example to use a named subpattern gives this:
.sp
  (?<OPEN> \e( )?    [^()]+    (?(<OPEN>) \e) )
.sp
If the name used in a condition of this kind is a duplicate, the test is
applied to all subpatterns of the same name, and is true if any one of them has
matched.
.
.SS "Checking for pattern recursion"
.rs
.sp
If the condition is the string (R), and there is no subpattern with the name R,
the condition is true if a recursive call to the whole pattern or any
subpattern has been made. If digits or a name preceded by ampersand follow the
letter R, for example:
.sp
  (?(R3)...) or (?(R&name)...)
.sp
the condition is true if the most recent recursion is into a subpattern whose
number or name is given. This condition does not check the entire recursion
stack. If the name used in a condition of this kind is a duplicate, the test is
applied to all subpatterns of the same name, and is true if any one of them is
the most recent recursion.
.P
At "top level", all these recursion test conditions are false.
.\" HTML <a href="#recursion">
.\" </a>
The syntax for recursive patterns
.\"
is described below.
.
.SS "Defining subpatterns for use by reference only"
.rs
.sp
If the condition is the string (DEFINE), and there is no subpattern with the
name DEFINE, the condition is always false. In this case, there may be only one
alternative in the subpattern. It is always skipped if control reaches this
point in the pattern; the idea of DEFINE is that it can be used to define
"subroutines" that can be referenced from elsewhere. (The use of
.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
"subroutines"
.\"
is described below.) For example, a pattern to match an IPv4 address could be
written like this (ignore whitespace and line breaks):
.sp
  (?(DEFINE) (?<byte> 2[0-4]\ed | 25[0-5] | 1\ed\ed | [1-9]?\ed) )
  \eb (?&byte) (\e.(?&byte)){3} \eb
.sp
The first part of the pattern is a DEFINE group inside which a another group
named "byte" is defined. This matches an individual component of an IPv4
address (a number less than 256). When matching takes place, this part of the
pattern is skipped because DEFINE acts like a false condition. The rest of the
pattern uses references to the named group to match the four dot-separated
components of an IPv4 address, insisting on a word boundary at each end.
.
.SS "Assertion conditions"
.rs
.sp
If the condition is not in any of the above formats, it must be an assertion.
This may be a positive or negative lookahead or lookbehind assertion. Consider
this pattern, again containing non-significant white space, and with the two
alternatives on the second line:
.sp
  (?(?=[^a-z]*[a-z])
  \ed{2}-[a-z]{3}-\ed{2}  |  \ed{2}-\ed{2}-\ed{2} )
.sp
The condition is a positive lookahead assertion that matches an optional
sequence of non-letters followed by a letter. In other words, it tests for the
presence of at least one letter in the subject. If a letter is found, the
subject is matched against the first alternative; otherwise it is matched
against the second. This pattern matches strings in one of the two forms
dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
.
.
.\" HTML <a name="comments"></a>
.SH COMMENTS
.rs
.sp
The sequence (?# marks the start of a comment that continues up to the next
closing parenthesis. Nested parentheses are not permitted. The characters
that make up a comment play no part in the pattern matching at all.
.P
If the PCRE_EXTENDED option is set, an unescaped # character outside a
character class introduces a comment that continues to immediately after the
next newline in the pattern.
.
.
.\" HTML <a name="recursion"></a>
.SH "RECURSIVE PATTERNS"
.rs
.sp
Consider the problem of matching a string in parentheses, allowing for
unlimited nested parentheses. Without the use of recursion, the best that can
be done is to use a pattern that matches up to some fixed depth of nesting. It
is not possible to handle an arbitrary nesting depth.
.P
For some time, Perl has provided a facility that allows regular expressions to
recurse (amongst other things). It does this by interpolating Perl code in the
expression at run time, and the code can refer to the expression itself. A Perl
pattern using code interpolation to solve the parentheses problem can be
created like this:
.sp
  $re = qr{\e( (?: (?>[^()]+) | (?p{$re}) )* \e)}x;
.sp
The (?p{...}) item interpolates Perl code at run time, and in this case refers
recursively to the pattern in which it appears.
.P
Obviously, PCRE cannot support the interpolation of Perl code. Instead, it
supports special syntax for recursion of the entire pattern, and also for
individual subpattern recursion. After its introduction in PCRE and Python,
this kind of recursion was subsequently introduced into Perl at release 5.10.
.P
A special item that consists of (? followed by a number greater than zero and a
closing parenthesis is a recursive call of the subpattern of the given number,
provided that it occurs inside that subpattern. (If not, it is a
.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
"subroutine"
.\"
call, which is described in the next section.) The special item (?R) or (?0) is
a recursive call of the entire regular expression.
.P
This PCRE pattern solves the nested parentheses problem (assume the
PCRE_EXTENDED option is set so that white space is ignored):
.sp
  \e( ( [^()]++ | (?R) )* \e)
.sp
First it matches an opening parenthesis. Then it matches any number of
substrings which can either be a sequence of non-parentheses, or a recursive
match of the pattern itself (that is, a correctly parenthesized substring).
Finally there is a closing parenthesis. Note the use of a possessive quantifier
to avoid backtracking into sequences of non-parentheses.
.P
If this were part of a larger pattern, you would not want to recurse the entire
pattern, so instead you could use this:
.sp
  ( \e( ( [^()]++ | (?1) )* \e) )
.sp
We have put the pattern into parentheses, and caused the recursion to refer to
them instead of the whole pattern.
.P
In a larger pattern, keeping track of parenthesis numbers can be tricky. This
is made easier by the use of relative references (a Perl 5.10 feature).
Instead of (?1) in the pattern above you can write (?-2) to refer to the second
most recently opened parentheses preceding the recursion. In other words, a
negative number counts capturing parentheses leftwards from the point at which
it is encountered.
.P
It is also possible to refer to subsequently opened parentheses, by writing
references such as (?+2). However, these cannot be recursive because the
reference is not inside the parentheses that are referenced. They are always
.\" HTML <a href="#subpatternsassubroutines">
.\" </a>
"subroutine"
.\"
calls, as described in the next section.
.P
An alternative approach is to use named parentheses instead. The Perl syntax
for this is (?&name); PCRE's earlier syntax (?P>name) is also supported. We
could rewrite the above example as follows:
.sp
  (?<pn> \e( ( [^()]++ | (?&pn) )* \e) )
.sp
If there is more than one subpattern with the same name, the earliest one is
used.
.P
This particular example pattern that we have been looking at contains nested
unlimited repeats, and so the use of a possessive quantifier for matching
strings of non-parentheses is important when applying the pattern to strings
that do not match. For example, when this pattern is applied to
.sp
  (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
.sp
it yields "no match" quickly. However, if a possessive quantifier is not used,
the match runs for a very long time indeed because there are so many different
ways the + and * repeats can carve up the subject, and all have to be tested
before failure can be reported.
.P
At the end of a match, the values of capturing parentheses are those from
the outermost level. If you want to obtain intermediate values, a callout
function can be used (see below and the
.\" HREF
\fBpcrecallout\fP
.\"
documentation). If the pattern above is matched against
.sp
  (ab(cd)ef)
.sp
the value for the inner capturing parentheses (numbered 2) is "ef", which is
the last value taken on at the top level. If a capturing subpattern is not
matched at the top level, its final value is unset, even if it is (temporarily)
set at a deeper level.
.P
If there are more than 15 capturing parentheses in a pattern, PCRE has to
obtain extra memory to store data during a recursion, which it does by using
\fBpcre_malloc\fP, freeing it via \fBpcre_free\fP afterwards. If no memory can
be obtained, the match fails with the PCRE_ERROR_NOMEMORY error.
.P
Do not confuse the (?R) item with the condition (R), which tests for recursion.
Consider this pattern, which matches text in angle brackets, allowing for
arbitrary nesting. Only digits are allowed in nested brackets (that is, when
recursing), whereas any characters are permitted at the outer level.
.sp
  < (?: (?(R) \ed++  | [^<>]*+) | (?R)) * >
.sp
In this pattern, (?(R) is the start of a conditional subpattern, with two
different alternatives for the recursive and non-recursive cases. The (?R) item
is the actual recursive call.
.
.
.\" HTML <a name="recursiondifference"></a>
.SS "Recursion difference from Perl"
.rs
.sp
In PCRE (like Python, but unlike Perl), a recursive subpattern call is always
treated as an atomic group. That is, once it has matched some of the subject
string, it is never re-entered, even if it contains untried alternatives and
there is a subsequent matching failure. This can be illustrated by the
following pattern, which purports to match a palindromic string that contains
an odd number of characters (for example, "a", "aba", "abcba", "abcdcba"):
.sp
  ^(.|(.)(?1)\e2)$
.sp
The idea is that it either matches a single character, or two identical
characters surrounding a sub-palindrome. In Perl, this pattern works; in PCRE
it does not if the pattern is longer than three characters. Consider the
subject string "abcba":
.P
At the top level, the first character is matched, but as it is not at the end
of the string, the first alternative fails; the second alternative is taken
and the recursion kicks in. The recursive call to subpattern 1 successfully
matches the next character ("b"). (Note that the beginning and end of line
tests are not part of the recursion).
.P
Back at the top level, the next character ("c") is compared with what
subpattern 2 matched, which was "a". This fails. Because the recursion is
treated as an atomic group, there are now no backtracking points, and so the
entire match fails. (Perl is able, at this point, to re-enter the recursion and
try the second alternative.) However, if the pattern is written with the
alternatives in the other order, things are different:
.sp
  ^((.)(?1)\e2|.)$
.sp
This time, the recursing alternative is tried first, and continues to recurse
until it runs out of characters, at which point the recursion fails. But this
time we do have another alternative to try at the higher level. That is the big
difference: in the previous case the remaining alternative is at a deeper
recursion level, which PCRE cannot use.
.P
To change the pattern so that matches all palindromic strings, not just those
with an odd number of characters, it is tempting to change the pattern to this:
.sp
  ^((.)(?1)\e2|.?)$
.sp
Again, this works in Perl, but not in PCRE, and for the same reason. When a
deeper recursion has matched a single character, it cannot be entered again in
order to match an empty string. The solution is to separate the two cases, and
write out the odd and even cases as alternatives at the higher level:
.sp
  ^(?:((.)(?1)\e2|)|((.)(?3)\e4|.))
.sp
If you want to match typical palindromic phrases, the pattern has to ignore all
non-word characters, which can be done like this:
.sp
  ^\eW*+(?:((.)\eW*+(?1)\eW*+\e2|)|((.)\eW*+(?3)\eW*+\e4|\eW*+.\eW*+))\eW*+$
.sp
If run with the PCRE_CASELESS option, this pattern matches phrases such as "A
man, a plan, a canal: Panama!" and it works well in both PCRE and Perl. Note
the use of the possessive quantifier *+ to avoid backtracking into sequences of
non-word characters. Without this, PCRE takes a great deal longer (ten times or
more) to match typical phrases, and Perl takes so long that you think it has
gone into a loop.
.P
\fBWARNING\fP: The palindrome-matching patterns above work only if the subject
string does not start with a palindrome that is shorter than the entire string.
For example, although "abcba" is correctly matched, if the subject is "ababa",
PCRE finds the palindrome "aba" at the start, then fails at top level because
the end of the string does not follow. Once again, it cannot jump back into the
recursion to try other alternatives, so the entire match fails.
.
.
.\" HTML <a name="subpatternsassubroutines"></a>
.SH "SUBPATTERNS AS SUBROUTINES"
.rs
.sp
If the syntax for a recursive subpattern reference (either by number or by
name) is used outside the parentheses to which it refers, it operates like a
subroutine in a programming language. The "called" subpattern may be defined
before or after the reference. A numbered reference can be absolute or
relative, as in these examples:
.sp
  (...(absolute)...)...(?2)...
  (...(relative)...)...(?-1)...
  (...(?+1)...(relative)...
.sp
An earlier example pointed out that the pattern
.sp
  (sens|respons)e and \e1ibility
.sp
matches "sense and sensibility" and "response and responsibility", but not
"sense and responsibility". If instead the pattern
.sp
  (sens|respons)e and (?1)ibility
.sp
is used, it does match "sense and responsibility" as well as the other two
strings. Another example is given in the discussion of DEFINE above.
.P
Like recursive subpatterns, a subroutine call is always treated as an atomic
group. That is, once it has matched some of the subject string, it is never
re-entered, even if it contains untried alternatives and there is a subsequent
matching failure. Any capturing parentheses that are set during the subroutine
call revert to their previous values afterwards.
.P
When a subpattern is used as a subroutine, processing options such as
case-independence are fixed when the subpattern is defined. They cannot be
changed for different calls. For example, consider this pattern:
.sp
  (abc)(?i:(?-1))
.sp
It matches "abcabc". It does not match "abcABC" because the change of
processing option does not affect the called subpattern.
.
.
.\" HTML <a name="onigurumasubroutines"></a>
.SH "ONIGURUMA SUBROUTINE SYNTAX"
.rs
.sp
For compatibility with Oniguruma, the non-Perl syntax \eg followed by a name or
a number enclosed either in angle brackets or single quotes, is an alternative
syntax for referencing a subpattern as a subroutine, possibly recursively. Here
are two of the examples used above, rewritten using this syntax:
.sp
  (?<pn> \e( ( (?>[^()]+) | \eg<pn> )* \e) )
  (sens|respons)e and \eg'1'ibility
.sp
PCRE supports an extension to Oniguruma: if a number is preceded by a
plus or a minus sign it is taken as a relative reference. For example:
.sp
  (abc)(?i:\eg<-1>)
.sp
Note that \eg{...} (Perl syntax) and \eg<...> (Oniguruma syntax) are \fInot\fP
synonymous. The former is a back reference; the latter is a subroutine call.
.
.
.SH CALLOUTS
.rs
.sp
Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl
code to be obeyed in the middle of matching a regular expression. This makes it
possible, amongst other things, to extract different substrings that match the
same pair of parentheses when there is a repetition.
.P
PCRE provides a similar feature, but of course it cannot obey arbitrary Perl
code. The feature is called "callout". The caller of PCRE provides an external
function by putting its entry point in the global variable \fIpcre_callout\fP.
By default, this variable contains NULL, which disables all calling out.
.P
Within a regular expression, (?C) indicates the points at which the external
function is to be called. If you want to identify different callout points, you
can put a number less than 256 after the letter C. The default value is zero.
For example, this pattern has two callout points:
.sp
  (?C1)abc(?C2)def
.sp
If the PCRE_AUTO_CALLOUT flag is passed to \fBpcre_compile()\fP, callouts are
automatically installed before each item in the pattern. They are all numbered
255.
.P
During matching, when PCRE reaches a callout point (and \fIpcre_callout\fP is
set), the external function is called. It is provided with the number of the
callout, the position in the pattern, and, optionally, one item of data
originally supplied by the caller of \fBpcre_exec()\fP. The callout function
may cause matching to proceed, to backtrack, or to fail altogether. A complete
description of the interface to the callout function is given in the
.\" HREF
\fBpcrecallout\fP
.\"
documentation.
.
.
.SH "BACKTRACKING CONTROL"
.rs
.sp
Perl 5.10 introduced a number of "Special Backtracking Control Verbs", which
are described in the Perl documentation as "experimental and subject to change
or removal in a future version of Perl". It goes on to say: "Their usage in
production code should be noted to avoid problems during upgrades." The same
remarks apply to the PCRE features described in this section.
.P
Since these verbs are specifically related to backtracking, most of them can be
used only when the pattern is to be matched using \fBpcre_exec()\fP, which uses
a backtracking algorithm. With the exception of (*FAIL), which behaves like a
failing negative assertion, they cause an error if encountered by
\fBpcre_dfa_exec()\fP.
.P
If any of these verbs are used in an assertion or subroutine subpattern
(including recursive subpatterns), their effect is confined to that subpattern;
it does not extend to the surrounding pattern. Note that such subpatterns are
processed as anchored at the point where they are tested.
.P
The new verbs make use of what was previously invalid syntax: an opening
parenthesis followed by an asterisk. In Perl, they are generally of the form
(*VERB:ARG) but PCRE does not support the use of arguments, so its general
form is just (*VERB). Any number of these verbs may occur in a pattern. There
are two kinds:
.
.SS "Verbs that act immediately"
.rs
.sp
The following verbs act as soon as they are encountered:
.sp
   (*ACCEPT)
.sp
This verb causes the match to end successfully, skipping the remainder of the
pattern. When inside a recursion, only the innermost pattern is ended
immediately. If (*ACCEPT) is inside capturing parentheses, the data so far is
captured. (This feature was added to PCRE at release 8.00.) For example:
.sp
  A((?:A|B(*ACCEPT)|C)D)
.sp
This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by
the outer parentheses.
.sp
  (*FAIL) or (*F)
.sp
This verb causes the match to fail, forcing backtracking to occur. It is
equivalent to (?!) but easier to read. The Perl documentation notes that it is
probably useful only when combined with (?{}) or (??{}). Those are, of course,
Perl features that are not present in PCRE. The nearest equivalent is the
callout feature, as for example in this pattern:
.sp
  a+(?C)(*FAIL)
.sp
A match with the string "aaaa" always fails, but the callout is taken before
each backtrack happens (in this example, 10 times).
.
.SS "Verbs that act after backtracking"
.rs
.sp
The following verbs do nothing when they are encountered. Matching continues
with what follows, but if there is no subsequent match, a failure is forced.
The verbs differ in exactly what kind of failure occurs.
.sp
  (*COMMIT)
.sp
This verb causes the whole match to fail outright if the rest of the pattern
does not match. Even if the pattern is unanchored, no further attempts to find
a match by advancing the starting point take place. Once (*COMMIT) has been
passed, \fBpcre_exec()\fP is committed to finding a match at the current
starting point, or not at all. For example:
.sp
  a+(*COMMIT)b
.sp
This matches "xxaab" but not "aacaab". It can be thought of as a kind of
dynamic anchor, or "I've started, so I must finish."
.sp
  (*PRUNE)
.sp
This verb causes the match to fail at the current position if the rest of the
pattern does not match. If the pattern is unanchored, the normal "bumpalong"
advance to the next starting character then happens. Backtracking can occur as
usual to the left of (*PRUNE), or when matching to the right of (*PRUNE), but
if there is no match to the right, backtracking cannot cross (*PRUNE).
In simple cases, the use of (*PRUNE) is just an alternative to an atomic
group or possessive quantifier, but there are some uses of (*PRUNE) that cannot
be expressed in any other way.
.sp
  (*SKIP)
.sp
This verb is like (*PRUNE), except that if the pattern is unanchored, the
"bumpalong" advance is not to the next character, but to the position in the
subject where (*SKIP) was encountered. (*SKIP) signifies that whatever text
was matched leading up to it cannot be part of a successful match. Consider:
.sp
  a+(*SKIP)b
.sp
If the subject is "aaaac...", after the first match attempt fails (starting at
the first character in the string), the starting point skips on to start the
next attempt at "c". Note that a possessive quantifer does not have the same
effect as this example; although it would suppress backtracking during the
first match attempt, the second attempt would start at the second character
instead of skipping on to "c".
.sp
  (*THEN)
.sp
This verb causes a skip to the next alternation if the rest of the pattern does
not match. That is, it cancels pending backtracking, but only within the
current alternation. Its name comes from the observation that it can be used
for a pattern-based if-then-else block:
.sp
  ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
.sp
If the COND1 pattern matches, FOO is tried (and possibly further items after
the end of the group if FOO succeeds); on failure the matcher skips to the
second alternative and tries COND2, without backtracking into COND1. If (*THEN)
is used outside of any alternation, it acts exactly like (*PRUNE).
.
.
.SH "SEE ALSO"
.rs
.sp
\fBpcreapi\fP(3), \fBpcrecallout\fP(3), \fBpcrematching\fP(3),
\fBpcresyntax\fP(3), \fBpcre\fP(3).
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 06 March 2010
Copyright (c) 1997-2010 University of Cambridge.
.fi
usr/share/man/man3/pcrematching.3000064400000020216150403561440012623 0ustar00.TH PCREMATCHING 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE MATCHING ALGORITHMS"
.rs
.sp
This document describes the two different algorithms that are available in PCRE
for matching a compiled regular expression against a given subject string. The
"standard" algorithm is the one provided by the \fBpcre_exec()\fP function.
This works in the same was as Perl's matching function, and provides a
Perl-compatible matching operation.
.P
An alternative algorithm is provided by the \fBpcre_dfa_exec()\fP function;
this operates in a different way, and is not Perl-compatible. It has advantages
and disadvantages compared with the standard algorithm, and these are described
below.
.P
When there is only one possible way in which a given subject string can match a
pattern, the two algorithms give the same answer. A difference arises, however,
when there are multiple possibilities. For example, if the pattern
.sp
  ^<.*>
.sp
is matched against the string
.sp
  <something> <something else> <something further>
.sp
there are three possible answers. The standard algorithm finds only one of
them, whereas the alternative algorithm finds all three.
.
.SH "REGULAR EXPRESSIONS AS TREES"
.rs
.sp
The set of strings that are matched by a regular expression can be represented
as a tree structure. An unlimited repetition in the pattern makes the tree of
infinite size, but it is still a tree. Matching the pattern to a given subject
string (from a given starting point) can be thought of as a search of the tree.
There are two ways to search a tree: depth-first and breadth-first, and these
correspond to the two matching algorithms provided by PCRE.
.
.SH "THE STANDARD MATCHING ALGORITHM"
.rs
.sp
In the terminology of Jeffrey Friedl's book "Mastering Regular
Expressions", the standard algorithm is an "NFA algorithm". It conducts a
depth-first search of the pattern tree. That is, it proceeds along a single
path through the tree, checking that the subject matches what is required. When
there is a mismatch, the algorithm tries any alternatives at the current point,
and if they all fail, it backs up to the previous branch point in the tree, and
tries the next alternative branch at that level. This often involves backing up
(moving to the left) in the subject string as well. The order in which
repetition branches are tried is controlled by the greedy or ungreedy nature of
the quantifier.
.P
If a leaf node is reached, a matching string has been found, and at that point
the algorithm stops. Thus, if there is more than one possible match, this
algorithm returns the first one that it finds. Whether this is the shortest,
the longest, or some intermediate length depends on the way the greedy and
ungreedy repetition quantifiers are specified in the pattern.
.P
Because it ends up with a single path through the tree, it is relatively
straightforward for this algorithm to keep track of the substrings that are
matched by portions of the pattern in parentheses. This provides support for
capturing parentheses and back references.
.
.SH "THE ALTERNATIVE MATCHING ALGORITHM"
.rs
.sp
This algorithm conducts a breadth-first search of the tree. Starting from the
first matching point in the subject, it scans the subject string from left to
right, once, character by character, and as it does this, it remembers all the
paths through the tree that represent valid matches. In Friedl's terminology,
this is a kind of "DFA algorithm", though it is not implemented as a
traditional finite state machine (it keeps multiple states active
simultaneously).
.P
Although the general principle of this matching algorithm is that it scans the
subject string only once, without backtracking, there is one exception: when a
lookaround assertion is encountered, the characters following or preceding the
current point have to be independently inspected.
.P
The scan continues until either the end of the subject is reached, or there are
no more unterminated paths. At this point, terminated paths represent the
different matching possibilities (if there are none, the match has failed).
Thus, if there is more than one possible match, this algorithm finds all of
them, and in particular, it finds the longest. There is an option to stop the
algorithm after the first match (which is necessarily the shortest) is found.
.P
Note that all the matches that are found start at the same point in the
subject. If the pattern
.sp
  cat(er(pillar)?)
.sp
is matched against the string "the caterpillar catchment", the result will be
the three strings "cat", "cater", and "caterpillar" that start at the fourth
character of the subject. The algorithm does not automatically move on to find
matches that start at later positions.
.P
There are a number of features of PCRE regular expressions that are not
supported by the alternative matching algorithm. They are as follows:
.P
1. Because the algorithm finds all possible matches, the greedy or ungreedy
nature of repetition quantifiers is not relevant. Greedy and ungreedy
quantifiers are treated in exactly the same way. However, possessive
quantifiers can make a difference when what follows could also match what is
quantified, for example in a pattern like this:
.sp
  ^a++\ew!
.sp
This pattern matches "aaab!" but not "aaa!", which would be matched by a
non-possessive quantifier. Similarly, if an atomic group is present, it is
matched as if it were a standalone pattern at the current point, and the
longest match is then "locked in" for the rest of the overall pattern.
.P
2. When dealing with multiple paths through the tree simultaneously, it is not
straightforward to keep track of captured substrings for the different matching
possibilities, and PCRE's implementation of this algorithm does not attempt to
do this. This means that no captured substrings are available.
.P
3. Because no substrings are captured, back references within the pattern are
not supported, and cause errors if encountered.
.P
4. For the same reason, conditional expressions that use a backreference as the
condition or test for a specific group recursion are not supported.
.P
5. Because many paths through the tree may be active, the \eK escape sequence,
which resets the start of the match when encountered (but may be on some paths
and not on others), is not supported. It causes an error if encountered.
.P
6. Callouts are supported, but the value of the \fIcapture_top\fP field is
always 1, and the value of the \fIcapture_last\fP field is always -1.
.P
7. The \eC escape sequence, which (in the standard algorithm) matches a single
byte, even in UTF-8 mode, is not supported because the alternative algorithm
moves through the subject string one character at a time, for all active paths
through the tree.
.P
8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
.
.SH "ADVANTAGES OF THE ALTERNATIVE ALGORITHM"
.rs
.sp
Using the alternative matching algorithm provides the following advantages:
.P
1. All possible matches (at a single point in the subject) are automatically
found, and in particular, the longest match is found. To find more than one
match using the standard algorithm, you have to do kludgy things with
callouts.
.P
2. Because the alternative algorithm scans the subject string just once, and
never needs to backtrack, it is possible to pass very long subject strings to
the matching function in several pieces, checking for partial matching each
time. The
.\" HREF
\fBpcrepartial\fP
.\"
documentation gives details of partial matching.
.
.
.SH "DISADVANTAGES OF THE ALTERNATIVE ALGORITHM"
.rs
.sp
The alternative algorithm suffers from a number of disadvantages:
.P
1. It is substantially slower than the standard algorithm. This is partly
because it has to search for all possible matches, but is also because it is
less susceptible to optimization.
.P
2. Capturing parentheses and back references are not supported.
.P
3. Although atomic groups are supported, their use does not provide the
performance advantage that it does for the standard algorithm.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 29 September 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man3/pcre_exec.3000064400000006326150403561440012122 0ustar00.TH PCRE_EXEC 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.
.SH DESCRIPTION
.rs
.sp
This function matches a compiled regular expression against a given subject
string, using a matching algorithm that is similar to Perl's. It returns
offsets to captured substrings. Its arguments are:
.sp
  \fIcode\fP         Points to the compiled pattern
  \fIextra\fP        Points to an associated \fBpcre_extra\fP structure,
                 or is NULL
  \fIsubject\fP      Points to the subject string
  \fIlength\fP       Length of the subject string, in bytes
  \fIstartoffset\fP  Offset in bytes in the subject at which to
                 start matching
  \fIoptions\fP      Option bits
  \fIovector\fP      Points to a vector of ints for result offsets
  \fIovecsize\fP     Number of elements in the vector (a multiple of 3)
.sp
The options are:
.sp
  PCRE_ANCHORED          Match only at the first position
  PCRE_BSR_ANYCRLF       \eR matches only CR, LF, or CRLF
  PCRE_BSR_UNICODE       \eR matches all Unicode line endings
  PCRE_NEWLINE_ANY       Recognize any Unicode newline sequence
  PCRE_NEWLINE_ANYCRLF   Recognize CR, LF, & CRLF as newline sequences
  PCRE_NEWLINE_CR        Recognize CR as the only newline sequence
  PCRE_NEWLINE_CRLF      Recognize CRLF as the only newline sequence
  PCRE_NEWLINE_LF        Recognize LF as the only newline sequence
  PCRE_NOTBOL            Subject string is not the beginning of a line
  PCRE_NOTEOL            Subject string is not the end of a line
  PCRE_NOTEMPTY          An empty string is not a valid match
  PCRE_NOTEMPTY_ATSTART  An empty string at the start of the subject
                           is not a valid match
  PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
  PCRE_NO_UTF8_CHECK     Do not check the subject for UTF-8
                           validity (only relevant if PCRE_UTF8
                           was set at compile time)
  PCRE_PARTIAL           ) Return PCRE_ERROR_PARTIAL for a partial
  PCRE_PARTIAL_SOFT      )   match if no full matches are found
  PCRE_PARTIAL_HARD      Return PCRE_ERROR_PARTIAL for a partial match
                           even if there is a full match as well
.sp
For details of partial matching, see the
.\" HREF
\fBpcrepartial\fP
.\"
page. A \fBpcre_extra\fP structure contains the following fields:
.sp
  \fIflags\fP        Bits indicating which fields are set
  \fIstudy_data\fP   Opaque data from \fBpcre_study()\fP
  \fImatch_limit\fP  Limit on internal resource use
  \fImatch_limit_recursion\fP  Limit on internal recursion depth
  \fIcallout_data\fP Opaque data passed back to callouts
  \fItables\fP       Points to character tables or is NULL
.sp
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
PCRE_EXTRA_TABLES.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_free_substring_list.3000064400000001101150403561440015234 0ustar00.TH PCRE_FREE_SUBSTRING_LIST 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B void pcre_free_substring_list(const char **\fIstringptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This is a convenience function for freeing the store obtained by a previous
call to \fBpcre_get_substring_list()\fP. Its only argument is a pointer to the
list of string pointers.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_config.3000064400000003752150403561440012443 0ustar00.TH PCRE_CONFIG 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
.
.SH DESCRIPTION
.rs
.sp
This function makes it possible for a client program to find out which optional
features are available in the version of the PCRE library it is using. Its
arguments are as follows:
.sp
  \fIwhat\fR     A code specifying what information is required
  \fIwhere\fR    Points to where to put the data
.sp
The available codes are:
.sp
  PCRE_CONFIG_LINK_SIZE     Internal link size: 2, 3, or 4
  PCRE_CONFIG_MATCH_LIMIT   Internal resource limit
  PCRE_CONFIG_MATCH_LIMIT_RECURSION
                            Internal recursion depth limit
  PCRE_CONFIG_NEWLINE       Value of the default newline sequence:
                                13 (0x000d)    for CR
                                10 (0x000a)    for LF
                              3338 (0x0d0a)    for CRLF
                                -2             for ANYCRLF
                                -1             for ANY
  PCRE_CONFIG_BSR           Indicates what \eR matches by default:
                                 0             all Unicode line endings
                                 1             CR, LF, or CRLF only
  PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
                            Threshold of return slots, above
                              which \fBmalloc()\fR is used by
                              the POSIX API
  PCRE_CONFIG_STACKRECURSE  Recursion implementation (1=stack 0=heap)
  PCRE_CONFIG_UTF8          Availability of UTF-8 support (1=yes 0=no)
  PCRE_CONFIG_UNICODE_PROPERTIES
                            Availability of Unicode property support
                              (1=yes 0=no)
.sp
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fR
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fR
.\"
page.
usr/share/man/man3/pcreapi.3000064400000257605150403561440011620 0ustar00.TH PCREAPI 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE NATIVE API"
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.PP
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP);
.PP
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.PP
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.PP
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.PP
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n
.B const char **\fIstringptr\fP);
.PP
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIname\fP);
.PP
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
.PP
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B const char **\fIstringptr\fP);
.PP
.B int pcre_get_substring_list(const char *\fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
.PP
.B void pcre_free_substring(const char *\fIstringptr\fP);
.PP
.B void pcre_free_substring_list(const char **\fIstringptr\fP);
.PP
.B const unsigned char *pcre_maketables(void);
.PP
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B int pcre_info(const pcre *\fIcode\fP, int *\fIoptptr\fP, int
.B *\fIfirstcharptr\fP);
.PP
.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
.PP
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
.PP
.B char *pcre_version(void);
.PP
.B void *(*pcre_malloc)(size_t);
.PP
.B void (*pcre_free)(void *);
.PP
.B void *(*pcre_stack_malloc)(size_t);
.PP
.B void (*pcre_stack_free)(void *);
.PP
.B int (*pcre_callout)(pcre_callout_block *);
.
.
.SH "PCRE API OVERVIEW"
.rs
.sp
PCRE has its own native API, which is described in this document. There are
also some wrapper functions that correspond to the POSIX regular expression
API. These are described in the
.\" HREF
\fBpcreposix\fP
.\"
documentation. Both of these APIs define a set of C function calls. A C++
wrapper is distributed with PCRE. It is documented in the
.\" HREF
\fBpcrecpp\fP
.\"
page.
.P
The native API C function prototypes are defined in the header file
\fBpcre.h\fP, and on Unix systems the library itself is called \fBlibpcre\fP.
It can normally be accessed by adding \fB-lpcre\fP to the command for linking
an application that uses PCRE. The header file defines the macros PCRE_MAJOR
and PCRE_MINOR to contain the major and minor release numbers for the library.
Applications can use these to include support for different releases of PCRE.
.P
The functions \fBpcre_compile()\fP, \fBpcre_compile2()\fP, \fBpcre_study()\fP,
and \fBpcre_exec()\fP are used for compiling and matching regular expressions
in a Perl-compatible manner. A sample program that demonstrates the simplest
way of using them is provided in the file called \fIpcredemo.c\fP in the PCRE
source distribution. A listing of this program is given in the
.\" HREF
\fBpcredemo\fP
.\"
documentation, and the
.\" HREF
\fBpcresample\fP
.\"
documentation describes how to compile and run it.
.P
A second matching function, \fBpcre_dfa_exec()\fP, which is not
Perl-compatible, is also provided. This uses a different algorithm for the
matching. The alternative algorithm finds all possible matches (at a given
point in the subject), and scans the subject just once (unless there are
lookbehind assertions). However, this algorithm does not return captured
substrings. A description of the two matching algorithms and their advantages
and disadvantages is given in the
.\" HREF
\fBpcrematching\fP
.\"
documentation.
.P
In addition to the main compiling and matching functions, there are convenience
functions for extracting captured substrings from a subject string that is
matched by \fBpcre_exec()\fP. They are:
.sp
  \fBpcre_copy_substring()\fP
  \fBpcre_copy_named_substring()\fP
  \fBpcre_get_substring()\fP
  \fBpcre_get_named_substring()\fP
  \fBpcre_get_substring_list()\fP
  \fBpcre_get_stringnumber()\fP
  \fBpcre_get_stringtable_entries()\fP
.sp
\fBpcre_free_substring()\fP and \fBpcre_free_substring_list()\fP are also
provided, to free the memory used for extracted strings.
.P
The function \fBpcre_maketables()\fP is used to build a set of character tables
in the current locale for passing to \fBpcre_compile()\fP, \fBpcre_exec()\fP,
or \fBpcre_dfa_exec()\fP. This is an optional facility that is provided for
specialist use. Most commonly, no special tables are passed, in which case
internal tables that are generated when PCRE is built are used.
.P
The function \fBpcre_fullinfo()\fP is used to find out information about a
compiled pattern; \fBpcre_info()\fP is an obsolete version that returns only
some of the available information, but is retained for backwards compatibility.
The function \fBpcre_version()\fP returns a pointer to a string containing the
version of PCRE and its date of release.
.P
The function \fBpcre_refcount()\fP maintains a reference count in a data block
containing a compiled pattern. This is provided for the benefit of
object-oriented applications.
.P
The global variables \fBpcre_malloc\fP and \fBpcre_free\fP initially contain
the entry points of the standard \fBmalloc()\fP and \fBfree()\fP functions,
respectively. PCRE calls the memory management functions via these variables,
so a calling program can replace them if it wishes to intercept the calls. This
should be done before calling any PCRE functions.
.P
The global variables \fBpcre_stack_malloc\fP and \fBpcre_stack_free\fP are also
indirections to memory management functions. These special functions are used
only when PCRE is compiled to use the heap for remembering data, instead of
recursive function calls, when running the \fBpcre_exec()\fP function. See the
.\" HREF
\fBpcrebuild\fP
.\"
documentation for details of how to do this. It is a non-standard way of
building PCRE, for use in environments that have limited stacks. Because of the
greater use of memory management, it runs more slowly. Separate functions are
provided so that special-purpose external code can be used for this case. When
used, these functions are always called in a stack-like manner (last obtained,
first freed), and always for memory blocks of the same size. There is a
discussion about PCRE's stack usage in the
.\" HREF
\fBpcrestack\fP
.\"
documentation.
.P
The global variable \fBpcre_callout\fP initially contains NULL. It can be set
by the caller to a "callout" function, which PCRE will then call at specified
points during a matching operation. Details are given in the
.\" HREF
\fBpcrecallout\fP
.\"
documentation.
.
.
.\" HTML <a name="newlines"></a>
.SH NEWLINES
.rs
.sp
PCRE supports five different conventions for indicating line breaks in
strings: a single CR (carriage return) character, a single LF (linefeed)
character, the two-character sequence CRLF, any of the three preceding, or any
Unicode newline sequence. The Unicode newline sequences are the three just
mentioned, plus the single characters VT (vertical tab, U+000B), FF (formfeed,
U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
(paragraph separator, U+2029).
.P
Each of the first three conventions is used by at least one operating system as
its standard newline sequence. When PCRE is built, a default can be specified.
The default default is LF, which is the Unix standard. When PCRE is run, the
default can be overridden, either when a pattern is compiled, or when it is
matched.
.P
At compile time, the newline convention can be specified by the \fIoptions\fP
argument of \fBpcre_compile()\fP, or it can be specified by special text at the
start of the pattern itself; this overrides any other settings. See the
.\" HREF
\fBpcrepattern\fP
.\"
page for details of the special character sequences.
.P
In the PCRE documentation the word "newline" is used to mean "the character or
pair of characters that indicate a line break". The choice of newline
convention affects the handling of the dot, circumflex, and dollar
metacharacters, the handling of #-comments in /x mode, and, when CRLF is a
recognized line ending sequence, the match position advancement for a
non-anchored pattern. There is more detail about this in the
.\" HTML <a href="#execoptions">
.\" </a>
section on \fBpcre_exec()\fP options
.\"
below.
.P
The choice of newline convention does not affect the interpretation of
the \en or \er escape sequences, nor does it affect what \eR matches, which is
controlled in a similar way, but by separate options.
.
.
.SH MULTITHREADING
.rs
.sp
The PCRE functions can be used in multi-threading applications, with the
proviso that the memory management functions pointed to by \fBpcre_malloc\fP,
\fBpcre_free\fP, \fBpcre_stack_malloc\fP, and \fBpcre_stack_free\fP, and the
callout function pointed to by \fBpcre_callout\fP, are shared by all threads.
.P
The compiled form of a regular expression is not altered during matching, so
the same compiled pattern can safely be used by several threads at once.
.
.
.SH "SAVING PRECOMPILED PATTERNS FOR LATER USE"
.rs
.sp
The compiled form of a regular expression can be saved and re-used at a later
time, possibly by a different program, and even on a host other than the one on
which it was compiled. Details are given in the
.\" HREF
\fBpcreprecompile\fP
.\"
documentation. However, compiling a regular expression with one version of PCRE
for use with a different version is not guaranteed to work and may cause
crashes.
.
.
.SH "CHECKING BUILD-TIME OPTIONS"
.rs
.sp
.B int pcre_config(int \fIwhat\fP, void *\fIwhere\fP);
.PP
The function \fBpcre_config()\fP makes it possible for a PCRE client to
discover which optional features have been compiled into the PCRE library. The
.\" HREF
\fBpcrebuild\fP
.\"
documentation has more details about these optional features.
.P
The first argument for \fBpcre_config()\fP is an integer, specifying which
information is required; the second argument is a pointer to a variable into
which the information is placed. The following information is available:
.sp
  PCRE_CONFIG_UTF8
.sp
The output is an integer that is set to one if UTF-8 support is available;
otherwise it is set to zero.
.sp
  PCRE_CONFIG_UNICODE_PROPERTIES
.sp
The output is an integer that is set to one if support for Unicode character
properties is available; otherwise it is set to zero.
.sp
  PCRE_CONFIG_NEWLINE
.sp
The output is an integer whose value specifies the default character sequence
that is recognized as meaning "newline". The four values that are supported
are: 10 for LF, 13 for CR, 3338 for CRLF, -2 for ANYCRLF, and -1 for ANY.
Though they are derived from ASCII, the same values are returned in EBCDIC
environments. The default should normally correspond to the standard sequence
for your operating system.
.sp
  PCRE_CONFIG_BSR
.sp
The output is an integer whose value indicates what character sequences the \eR
escape sequence matches by default. A value of 0 means that \eR matches any
Unicode line ending sequence; a value of 1 means that \eR matches only CR, LF,
or CRLF. The default can be overridden when a pattern is compiled or matched.
.sp
  PCRE_CONFIG_LINK_SIZE
.sp
The output is an integer that contains the number of bytes used for internal
linkage in compiled regular expressions. The value is 2, 3, or 4. Larger values
allow larger regular expressions to be compiled, at the expense of slower
matching. The default value of 2 is sufficient for all but the most massive
patterns, since it allows the compiled pattern to be up to 64K in size.
.sp
  PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
.sp
The output is an integer that contains the threshold above which the POSIX
interface uses \fBmalloc()\fP for output vectors. Further details are given in
the
.\" HREF
\fBpcreposix\fP
.\"
documentation.
.sp
  PCRE_CONFIG_MATCH_LIMIT
.sp
The output is a long integer that gives the default limit for the number of
internal matching function calls in a \fBpcre_exec()\fP execution. Further
details are given with \fBpcre_exec()\fP below.
.sp
  PCRE_CONFIG_MATCH_LIMIT_RECURSION
.sp
The output is a long integer that gives the default limit for the depth of
recursion when calling the internal matching function in a \fBpcre_exec()\fP
execution. Further details are given with \fBpcre_exec()\fP below.
.sp
  PCRE_CONFIG_STACKRECURSE
.sp
The output is an integer that is set to one if internal recursion when running
\fBpcre_exec()\fP is implemented by recursive function calls that use the stack
to remember their state. This is the usual way that PCRE is compiled. The
output is zero if PCRE was compiled to use blocks of data on the heap instead
of recursive function calls. In this case, \fBpcre_stack_malloc\fP and
\fBpcre_stack_free\fP are called to manage memory blocks on the heap, thus
avoiding the use of the stack.
.
.
.SH "COMPILING A PATTERN"
.rs
.sp
.B pcre *pcre_compile(const char *\fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.sp
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.P
Either of the functions \fBpcre_compile()\fP or \fBpcre_compile2()\fP can be
called to compile a pattern into an internal form. The only difference between
the two interfaces is that \fBpcre_compile2()\fP has an additional argument,
\fIerrorcodeptr\fP, via which a numerical error code can be returned. To avoid
too much repetition, we refer just to \fBpcre_compile()\fP below, but the
information applies equally to \fBpcre_compile2()\fP.
.P
The pattern is a C string terminated by a binary zero, and is passed in the
\fIpattern\fP argument. A pointer to a single block of memory that is obtained
via \fBpcre_malloc\fP is returned. This contains the compiled code and related
data. The \fBpcre\fP type is defined for the returned block; this is a typedef
for a structure whose contents are not externally defined. It is up to the
caller to free the memory (via \fBpcre_free\fP) when it is no longer required.
.P
Although the compiled code of a PCRE regex is relocatable, that is, it does not
depend on memory location, the complete \fBpcre\fP data block is not
fully relocatable, because it may contain a copy of the \fItableptr\fP
argument, which is an address (see below).
.P
The \fIoptions\fP argument contains various bit settings that affect the
compilation. It should be zero if no options are required. The available
options are described below. Some of them (in particular, those that are
compatible with Perl, but some others as well) can also be set and unset from
within the pattern (see the detailed description in the
.\" HREF
\fBpcrepattern\fP
.\"
documentation). For those options that can be different in different parts of
the pattern, the contents of the \fIoptions\fP argument specifies their
settings at the start of compilation and execution. The PCRE_ANCHORED,
PCRE_BSR_\fIxxx\fP, and PCRE_NEWLINE_\fIxxx\fP options can be set at the time
of matching as well as at compile time.
.P
If \fIerrptr\fP is NULL, \fBpcre_compile()\fP returns NULL immediately.
Otherwise, if compilation of a pattern fails, \fBpcre_compile()\fP returns
NULL, and sets the variable pointed to by \fIerrptr\fP to point to a textual
error message. This is a static string that is part of the library. You must
not try to free it. The byte offset from the start of the pattern to the
character that was being processed when the error was discovered is placed in
the variable pointed to by \fIerroffset\fP, which must not be NULL. If it is,
an immediate error is given. Some errors are not detected until checks are
carried out when the whole pattern has been scanned; in this case the offset is
set to the end of the pattern.
.P
If \fBpcre_compile2()\fP is used instead of \fBpcre_compile()\fP, and the
\fIerrorcodeptr\fP argument is not NULL, a non-zero error code number is
returned via this argument in the event of an error. This is in addition to the
textual error message. Error codes and messages are listed below.
.P
If the final argument, \fItableptr\fP, is NULL, PCRE uses a default set of
character tables that are built when PCRE is compiled, using the default C
locale. Otherwise, \fItableptr\fP must be an address that is the result of a
call to \fBpcre_maketables()\fP. This value is stored with the compiled
pattern, and used again by \fBpcre_exec()\fP, unless another table pointer is
passed to it. For more discussion, see the section on locale support below.
.P
This code fragment shows a typical straightforward call to \fBpcre_compile()\fP:
.sp
  pcre *re;
  const char *error;
  int erroffset;
  re = pcre_compile(
    "^A.*Z",          /* the pattern */
    0,                /* default options */
    &error,           /* for error message */
    &erroffset,       /* for error offset */
    NULL);            /* use default character tables */
.sp
The following names for option bits are defined in the \fBpcre.h\fP header
file:
.sp
  PCRE_ANCHORED
.sp
If this bit is set, the pattern is forced to be "anchored", that is, it is
constrained to match only at the first matching point in the string that is
being searched (the "subject string"). This effect can also be achieved by
appropriate constructs in the pattern itself, which is the only way to do it in
Perl.
.sp
  PCRE_AUTO_CALLOUT
.sp
If this bit is set, \fBpcre_compile()\fP automatically inserts callout items,
all with number 255, before each pattern item. For discussion of the callout
facility, see the
.\" HREF
\fBpcrecallout\fP
.\"
documentation.
.sp
  PCRE_BSR_ANYCRLF
  PCRE_BSR_UNICODE
.sp
These options (which are mutually exclusive) control what the \eR escape
sequence matches. The choice is either to match only CR, LF, or CRLF, or to
match any Unicode newline sequence. The default is specified when PCRE is
built. It can be overridden from within the pattern, or by setting an option
when a compiled pattern is matched.
.sp
  PCRE_CASELESS
.sp
If this bit is set, letters in the pattern match both upper and lower case
letters. It is equivalent to Perl's /i option, and it can be changed within a
pattern by a (?i) option setting. In UTF-8 mode, PCRE always understands the
concept of case for characters whose values are less than 128, so caseless
matching is always possible. For characters with higher values, the concept of
case is supported if PCRE is compiled with Unicode property support, but not
otherwise. If you want to use caseless matching for characters 128 and above,
you must ensure that PCRE is compiled with Unicode property support as well as
with UTF-8 support.
.sp
  PCRE_DOLLAR_ENDONLY
.sp
If this bit is set, a dollar metacharacter in the pattern matches only at the
end of the subject string. Without this option, a dollar also matches
immediately before a newline at the end of the string (but not before any other
newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set.
There is no equivalent to this option in Perl, and no way to set it within a
pattern.
.sp
  PCRE_DOTALL
.sp
If this bit is set, a dot metacharater in the pattern matches all characters,
including those that indicate newline. Without it, a dot does not match when
the current position is at a newline. This option is equivalent to Perl's /s
option, and it can be changed within a pattern by a (?s) option setting. A
negative class such as [^a] always matches newline characters, independent of
the setting of this option.
.sp
  PCRE_DUPNAMES
.sp
If this bit is set, names used to identify capturing subpatterns need not be
unique. This can be helpful for certain types of pattern when it is known that
only one instance of the named subpattern can ever be matched. There are more
details of named subpatterns below; see also the
.\" HREF
\fBpcrepattern\fP
.\"
documentation.
.sp
  PCRE_EXTENDED
.sp
If this bit is set, whitespace data characters in the pattern are totally
ignored except when escaped or inside a character class. Whitespace does not
include the VT character (code 11). In addition, characters between an
unescaped # outside a character class and the next newline, inclusive, are also
ignored. This is equivalent to Perl's /x option, and it can be changed within a
pattern by a (?x) option setting.
.P
This option makes it possible to include comments inside complicated patterns.
Note, however, that this applies only to data characters. Whitespace characters
may never appear within special character sequences in a pattern, for example
within the sequence (?( which introduces a conditional subpattern.
.sp
  PCRE_EXTRA
.sp
This option was invented in order to turn on additional functionality of PCRE
that is incompatible with Perl, but it is currently of very little use. When
set, any backslash in a pattern that is followed by a letter that has no
special meaning causes an error, thus reserving these combinations for future
expansion. By default, as in Perl, a backslash followed by a letter with no
special meaning is treated as a literal. (Perl can, however, be persuaded to
give a warning for this.) There are at present no other features controlled by
this option. It can also be set by a (?X) option setting within a pattern.
.sp
  PCRE_FIRSTLINE
.sp
If this option is set, an unanchored pattern is required to match before or at
the first newline in the subject string, though the matched text may continue
over the newline.
.sp
  PCRE_JAVASCRIPT_COMPAT
.sp
If this option is set, PCRE's behaviour is changed in some ways so that it is
compatible with JavaScript rather than Perl. The changes are as follows:
.P
(1) A lone closing square bracket in a pattern causes a compile-time error,
because this is illegal in JavaScript (by default it is treated as a data
character). Thus, the pattern AB]CD becomes illegal when this option is set.
.P
(2) At run time, a back reference to an unset subpattern group matches an empty
string (by default this causes the current matching alternative to fail). A
pattern such as (\e1)(a) succeeds when this option is set (assuming it can find
an "a" in the subject), whereas it fails by default, for Perl compatibility.
.sp
  PCRE_MULTILINE
.sp
By default, PCRE treats the subject string as consisting of a single line of
characters (even if it actually contains newlines). The "start of line"
metacharacter (^) matches only at the start of the string, while the "end of
line" metacharacter ($) matches only at the end of the string, or before a
terminating newline (unless PCRE_DOLLAR_ENDONLY is set). This is the same as
Perl.
.P
When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs
match immediately following or immediately before internal newlines in the
subject string, respectively, as well as at the very start and end. This is
equivalent to Perl's /m option, and it can be changed within a pattern by a
(?m) option setting. If there are no newlines in a subject string, or no
occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no effect.
.sp
  PCRE_NEWLINE_CR
  PCRE_NEWLINE_LF
  PCRE_NEWLINE_CRLF
  PCRE_NEWLINE_ANYCRLF
  PCRE_NEWLINE_ANY
.sp
These options override the default newline definition that was chosen when PCRE
was built. Setting the first or the second specifies that a newline is
indicated by a single character (CR or LF, respectively). Setting
PCRE_NEWLINE_CRLF specifies that a newline is indicated by the two-character
CRLF sequence. Setting PCRE_NEWLINE_ANYCRLF specifies that any of the three
preceding sequences should be recognized. Setting PCRE_NEWLINE_ANY specifies
that any Unicode newline sequence should be recognized. The Unicode newline
sequences are the three just mentioned, plus the single characters VT (vertical
tab, U+000B), FF (formfeed, U+000C), NEL (next line, U+0085), LS (line
separator, U+2028), and PS (paragraph separator, U+2029). The last two are
recognized only in UTF-8 mode.
.P
The newline setting in the options word uses three bits that are treated
as a number, giving eight possibilities. Currently only six are used (default
plus the five values above). This means that if you set more than one newline
option, the combination may or may not be sensible. For example,
PCRE_NEWLINE_CR with PCRE_NEWLINE_LF is equivalent to PCRE_NEWLINE_CRLF, but
other combinations may yield unused numbers and cause an error.
.P
The only time that a line break is specially recognized when compiling a
pattern is if PCRE_EXTENDED is set, and an unescaped # outside a character
class is encountered. This indicates a comment that lasts until after the next
line break sequence. In other circumstances, line break sequences are treated
as literal data, except that in PCRE_EXTENDED mode, both CR and LF are treated
as whitespace characters and are therefore ignored.
.P
The newline option that is set at compile time becomes the default that is used
for \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, but it can be overridden.
.sp
  PCRE_NO_AUTO_CAPTURE
.sp
If this option is set, it disables the use of numbered capturing parentheses in
the pattern. Any opening parenthesis that is not followed by ? behaves as if it
were followed by ?: but named parentheses can still be used for capturing (and
they acquire numbers in the usual way). There is no equivalent of this option
in Perl.
.sp
  PCRE_UNGREEDY
.sp
This option inverts the "greediness" of the quantifiers so that they are not
greedy by default, but become greedy if followed by "?". It is not compatible
with Perl. It can also be set by a (?U) option setting within the pattern.
.sp
  PCRE_UTF8
.sp
This option causes PCRE to regard both the pattern and the subject as strings
of UTF-8 characters instead of single-byte character strings. However, it is
available only when PCRE is built to include UTF-8 support. If not, the use
of this option provokes an error. Details of how this option changes the
behaviour of PCRE are given in the
.\" HTML <a href="pcre.html#utf8support">
.\" </a>
section on UTF-8 support
.\"
in the main
.\" HREF
\fBpcre\fP
.\"
page.
.sp
  PCRE_NO_UTF8_CHECK
.sp
When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is
automatically checked. There is a discussion about the
.\" HTML <a href="pcre.html#utf8strings">
.\" </a>
validity of UTF-8 strings
.\"
in the main
.\" HREF
\fBpcre\fP
.\"
page. If an invalid UTF-8 sequence of bytes is found, \fBpcre_compile()\fP
returns an error. If you already know that your pattern is valid, and you want
to skip this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK
option. When it is set, the effect of passing an invalid UTF-8 string as a
pattern is undefined. It may cause your program to crash. Note that this option
can also be passed to \fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP, to suppress
the UTF-8 validity checking of subject strings.
.
.
.SH "COMPILATION ERROR CODES"
.rs
.sp
The following table lists the error codes than may be returned by
\fBpcre_compile2()\fP, along with the error messages that may be returned by
both compiling functions. As PCRE has developed, some error codes have fallen
out of use. To avoid confusion, they have not been re-used.
.sp
   0  no error
   1  \e at end of pattern
   2  \ec at end of pattern
   3  unrecognized character follows \e
   4  numbers out of order in {} quantifier
   5  number too big in {} quantifier
   6  missing terminating ] for character class
   7  invalid escape sequence in character class
   8  range out of order in character class
   9  nothing to repeat
  10  [this code is not in use]
  11  internal error: unexpected repeat
  12  unrecognized character after (? or (?-
  13  POSIX named classes are supported only within a class
  14  missing )
  15  reference to non-existent subpattern
  16  erroffset passed as NULL
  17  unknown option bit(s) set
  18  missing ) after comment
  19  [this code is not in use]
  20  regular expression is too large
  21  failed to get memory
  22  unmatched parentheses
  23  internal error: code overflow
  24  unrecognized character after (?<
  25  lookbehind assertion is not fixed length
  26  malformed number or name after (?(
  27  conditional group contains more than two branches
  28  assertion expected after (?(
  29  (?R or (?[+-]digits must be followed by )
  30  unknown POSIX class name
  31  POSIX collating elements are not supported
  32  this version of PCRE is not compiled with PCRE_UTF8 support
  33  [this code is not in use]
  34  character value in \ex{...} sequence is too large
  35  invalid condition (?(0)
  36  \eC not allowed in lookbehind assertion
  37  PCRE does not support \eL, \el, \eN, \eU, or \eu
  38  number after (?C is > 255
  39  closing ) for (?C expected
  40  recursive call could loop indefinitely
  41  unrecognized character after (?P
  42  syntax error in subpattern name (missing terminator)
  43  two named subpatterns have the same name
  44  invalid UTF-8 string
  45  support for \eP, \ep, and \eX has not been compiled
  46  malformed \eP or \ep sequence
  47  unknown property name after \eP or \ep
  48  subpattern name is too long (maximum 32 characters)
  49  too many named subpatterns (maximum 10000)
  50  [this code is not in use]
  51  octal value is greater than \e377 (not in UTF-8 mode)
  52  internal error: overran compiling workspace
  53  internal error: previously-checked referenced subpattern not found
  54  DEFINE group contains more than one branch
  55  repeating a DEFINE group is not allowed
  56  inconsistent NEWLINE options
  57  \eg is not followed by a braced, angle-bracketed, or quoted
        name/number or by a plain number
  58  a numbered reference must not be zero
  59  (*VERB) with an argument is not supported
  60  (*VERB) not recognized
  61  number is too big
  62  subpattern name expected
  63  digit expected after (?+
  64  ] is an invalid data character in JavaScript compatibility mode
.sp
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
be used if the limits were changed when PCRE was built.
.
.
.SH "STUDYING A PATTERN"
.rs
.sp
.B pcre_extra *pcre_study(const pcre *\fIcode\fP, int \fIoptions\fP
.ti +5n
.B const char **\fIerrptr\fP);
.PP
If a compiled pattern is going to be used several times, it is worth spending
more time analyzing it in order to speed up the time taken for matching. The
function \fBpcre_study()\fP takes a pointer to a compiled pattern as its first
argument. If studying the pattern produces additional information that will
help speed up matching, \fBpcre_study()\fP returns a pointer to a
\fBpcre_extra\fP block, in which the \fIstudy_data\fP field points to the
results of the study.
.P
The returned value from \fBpcre_study()\fP can be passed directly to
\fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. However, a \fBpcre_extra\fP block
also contains other fields that can be set by the caller before the block is
passed; these are described
.\" HTML <a href="#extradata">
.\" </a>
below
.\"
in the section on matching a pattern.
.P
If studying the pattern does not produce any useful information,
\fBpcre_study()\fP returns NULL. In that circumstance, if the calling program
wants to pass any of the other fields to \fBpcre_exec()\fP or
\fBpcre_dfa_exec()\fP, it must set up its own \fBpcre_extra\fP block.
.P
The second argument of \fBpcre_study()\fP contains option bits. At present, no
options are defined, and this argument should always be zero.
.P
The third argument for \fBpcre_study()\fP is a pointer for an error message. If
studying succeeds (even if no data is returned), the variable it points to is
set to NULL. Otherwise it is set to point to a textual error message. This is a
static string that is part of the library. You must not try to free it. You
should test the error pointer for NULL after calling \fBpcre_study()\fP, to be
sure that it has run successfully.
.P
This is a typical call to \fBpcre_study\fP():
.sp
  pcre_extra *pe;
  pe = pcre_study(
    re,             /* result of pcre_compile() */
    0,              /* no options exist */
    &error);        /* set to NULL or points to a message */
.sp
Studying a pattern does two things: first, a lower bound for the length of
subject string that is needed to match the pattern is computed. This does not
mean that there are any strings of that length that match, but it does
guarantee that no shorter strings match. The value is used by
\fBpcre_exec()\fP and \fBpcre_dfa_exec()\fP to avoid wasting time by trying to
match strings that are shorter than the lower bound. You can find out the value
in a calling program via the \fBpcre_fullinfo()\fP function.
.P
Studying a pattern is also useful for non-anchored patterns that do not have a
single fixed starting character. A bitmap of possible starting bytes is
created. This speeds up finding a position in the subject at which to start
matching.
.
.
.\" HTML <a name="localesupport"></a>
.SH "LOCALE SUPPORT"
.rs
.sp
PCRE handles caseless matching, and determines whether characters are letters,
digits, or whatever, by reference to a set of tables, indexed by character
value. When running in UTF-8 mode, this applies only to characters with codes
less than 128. Higher-valued codes never match escapes such as \ew or \ed, but
can be tested with \ep if PCRE is built with Unicode character property
support. The use of locales with Unicode is discouraged. If you are handling
characters with codes greater than 128, you should either use UTF-8 and
Unicode, or use locales, but not try to mix the two.
.P
PCRE contains an internal set of tables that are used when the final argument
of \fBpcre_compile()\fP is NULL. These are sufficient for many applications.
Normally, the internal tables recognize only ASCII characters. However, when
PCRE is built, it is possible to cause the internal tables to be rebuilt in the
default "C" locale of the local system, which may cause them to be different.
.P
The internal tables can always be overridden by tables supplied by the
application that calls PCRE. These may be created in a different locale from
the default. As more and more applications change to using Unicode, the need
for this locale support is expected to die away.
.P
External tables are built by calling the \fBpcre_maketables()\fP function,
which has no arguments, in the relevant locale. The result can then be passed
to \fBpcre_compile()\fP or \fBpcre_exec()\fP as often as necessary. For
example, to build and use tables that are appropriate for the French locale
(where accented characters with values greater than 128 are treated as letters),
the following code could be used:
.sp
  setlocale(LC_CTYPE, "fr_FR");
  tables = pcre_maketables();
  re = pcre_compile(..., tables);
.sp
The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
are using Windows, the name for the French locale is "french".
.P
When \fBpcre_maketables()\fP runs, the tables are built in memory that is
obtained via \fBpcre_malloc\fP. It is the caller's responsibility to ensure
that the memory containing the tables remains available for as long as it is
needed.
.P
The pointer that is passed to \fBpcre_compile()\fP is saved with the compiled
pattern, and the same tables are used via this pointer by \fBpcre_study()\fP
and normally also by \fBpcre_exec()\fP. Thus, by default, for any single
pattern, compilation, studying and matching all happen in the same locale, but
different patterns can be compiled in different locales.
.P
It is possible to pass a table pointer or NULL (indicating the use of the
internal tables) to \fBpcre_exec()\fP. Although not intended for this purpose,
this facility could be used to match a pattern in a different locale from the
one in which it was compiled. Passing table pointers at run time is discussed
below in the section on matching a pattern.
.
.
.SH "INFORMATION ABOUT A PATTERN"
.rs
.sp
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.PP
The \fBpcre_fullinfo()\fP function returns information about a compiled
pattern. It replaces the obsolete \fBpcre_info()\fP function, which is
nevertheless retained for backwards compability (and is documented below).
.P
The first argument for \fBpcre_fullinfo()\fP is a pointer to the compiled
pattern. The second argument is the result of \fBpcre_study()\fP, or NULL if
the pattern was not studied. The third argument specifies which piece of
information is required, and the fourth argument is a pointer to a variable
to receive the data. The yield of the function is zero for success, or one of
the following negative numbers:
.sp
  PCRE_ERROR_NULL       the argument \fIcode\fP was NULL
                        the argument \fIwhere\fP was NULL
  PCRE_ERROR_BADMAGIC   the "magic number" was not found
  PCRE_ERROR_BADOPTION  the value of \fIwhat\fP was invalid
.sp
The "magic number" is placed at the start of each compiled pattern as an simple
check against passing an arbitrary memory pointer. Here is a typical call of
\fBpcre_fullinfo()\fP, to obtain the length of the compiled pattern:
.sp
  int rc;
  size_t length;
  rc = pcre_fullinfo(
    re,               /* result of pcre_compile() */
    pe,               /* result of pcre_study(), or NULL */
    PCRE_INFO_SIZE,   /* what is required */
    &length);         /* where to put the data */
.sp
The possible values for the third argument are defined in \fBpcre.h\fP, and are
as follows:
.sp
  PCRE_INFO_BACKREFMAX
.sp
Return the number of the highest back reference in the pattern. The fourth
argument should point to an \fBint\fP variable. Zero is returned if there are
no back references.
.sp
  PCRE_INFO_CAPTURECOUNT
.sp
Return the number of capturing subpatterns in the pattern. The fourth argument
should point to an \fBint\fP variable.
.sp
  PCRE_INFO_DEFAULT_TABLES
.sp
Return a pointer to the internal default character tables within PCRE. The
fourth argument should point to an \fBunsigned char *\fP variable. This
information call is provided for internal use by the \fBpcre_study()\fP
function. External callers can cause PCRE to use its internal tables by passing
a NULL table pointer.
.sp
  PCRE_INFO_FIRSTBYTE
.sp
Return information about the first byte of any matched string, for a
non-anchored pattern. The fourth argument should point to an \fBint\fP
variable. (This option used to be called PCRE_INFO_FIRSTCHAR; the old name is
still recognized for backwards compatibility.)
.P
If there is a fixed first byte, for example, from a pattern such as
(cat|cow|coyote), its value is returned. Otherwise, if either
.sp
(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
starts with "^", or
.sp
(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
(if it were set, the pattern would be anchored),
.sp
-1 is returned, indicating that the pattern matches only at the start of a
subject string or after any newline within the string. Otherwise -2 is
returned. For anchored patterns, -2 is returned.
.sp
  PCRE_INFO_FIRSTTABLE
.sp
If the pattern was studied, and this resulted in the construction of a 256-bit
table indicating a fixed set of bytes for the first byte in any matching
string, a pointer to the table is returned. Otherwise NULL is returned. The
fourth argument should point to an \fBunsigned char *\fP variable.
.sp
  PCRE_INFO_HASCRORLF
.sp
Return 1 if the pattern contains any explicit matches for CR or LF characters,
otherwise 0. The fourth argument should point to an \fBint\fP variable. An
explicit match is either a literal CR or LF character, or \er or \en.
.sp
  PCRE_INFO_JCHANGED
.sp
Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise
0. The fourth argument should point to an \fBint\fP variable. (?J) and
(?-J) set and unset the local PCRE_DUPNAMES option, respectively.
.sp
  PCRE_INFO_LASTLITERAL
.sp
Return the value of the rightmost literal byte that must exist in any matched
string, other than at its start, if such a byte has been recorded. The fourth
argument should point to an \fBint\fP variable. If there is no such byte, -1 is
returned. For anchored patterns, a last literal byte is recorded only if it
follows something of variable length. For example, for the pattern
/^a\ed+z\ed+/ the returned value is "z", but for /^a\edz\ed/ the returned value
is -1.
.sp
  PCRE_INFO_MINLENGTH
.sp
If the pattern was studied and a minimum length for matching subject strings
was computed, its value is returned. Otherwise the returned value is -1. The
value is a number of characters, not bytes (this may be relevant in UTF-8
mode). The fourth argument should point to an \fBint\fP variable. A
non-negative value is a lower bound to the length of any matching string. There
may not be any strings of that length that do actually match, but every string
that does match is at least that long.
.sp
  PCRE_INFO_NAMECOUNT
  PCRE_INFO_NAMEENTRYSIZE
  PCRE_INFO_NAMETABLE
.sp
PCRE supports the use of named as well as numbered capturing parentheses. The
names are just an additional way of identifying the parentheses, which still
acquire numbers. Several convenience functions such as
\fBpcre_get_named_substring()\fP are provided for extracting captured
substrings by name. It is also possible to extract the data directly, by first
converting the name to a number in order to access the correct pointers in the
output vector (described with \fBpcre_exec()\fP below). To do the conversion,
you need to use the name-to-number map, which is described by these three
values.
.P
The map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT gives
the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each
entry; both of these return an \fBint\fP value. The entry size depends on the
length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first
entry of the table (a pointer to \fBchar\fP). The first two bytes of each entry
are the number of the capturing parenthesis, most significant byte first. The
rest of the entry is the corresponding name, zero terminated.
.P
The names are in alphabetical order. Duplicate names may appear if (?| is used
to create multiple groups with the same number, as described in the
.\" HTML <a href="pcrepattern.html#dupsubpatternnumber">
.\" </a>
section on duplicate subpattern numbers
.\"
in the
.\" HREF
\fBpcrepattern\fP
.\"
page. Duplicate names for subpatterns with different numbers are permitted only
if PCRE_DUPNAMES is set. In all cases of duplicate names, they appear in the
table in the order in which they were found in the pattern. In the absence of
(?| this is the order of increasing number; when (?| is used this is not
necessarily the case because later subpatterns may have lower numbers.
.P
As a simple example of the name/number table, consider the following pattern
(assume PCRE_EXTENDED is set, so white space - including newlines - is
ignored):
.sp
.\" JOIN
  (?<date> (?<year>(\ed\ed)?\ed\ed) -
  (?<month>\ed\ed) - (?<day>\ed\ed) )
.sp
There are four named subpatterns, so the table has four entries, and each entry
in the table is eight bytes long. The table is as follows, with non-printing
bytes shows in hexadecimal, and undefined bytes shown as ??:
.sp
  00 01 d  a  t  e  00 ??
  00 05 d  a  y  00 ?? ??
  00 04 m  o  n  t  h  00
  00 02 y  e  a  r  00 ??
.sp
When writing code to extract data from named subpatterns using the
name-to-number map, remember that the length of the entries is likely to be
different for each compiled pattern.
.sp
  PCRE_INFO_OKPARTIAL
.sp
Return 1 if the pattern can be used for partial matching with
\fBpcre_exec()\fP, otherwise 0. The fourth argument should point to an
\fBint\fP variable. From release 8.00, this always returns 1, because the
restrictions that previously applied to partial matching have been lifted. The
.\" HREF
\fBpcrepartial\fP
.\"
documentation gives details of partial matching.
.sp
  PCRE_INFO_OPTIONS
.sp
Return a copy of the options with which the pattern was compiled. The fourth
argument should point to an \fBunsigned long int\fP variable. These option bits
are those specified in the call to \fBpcre_compile()\fP, modified by any
top-level option settings at the start of the pattern itself. In other words,
they are the options that will be in force when matching starts. For example,
if the pattern /(?im)abc(?-i)d/ is compiled with the PCRE_EXTENDED option, the
result is PCRE_CASELESS, PCRE_MULTILINE, and PCRE_EXTENDED.
.P
A pattern is automatically anchored by PCRE if all of its top-level
alternatives begin with one of the following:
.sp
  ^     unless PCRE_MULTILINE is set
  \eA    always
  \eG    always
.\" JOIN
  .*    if PCRE_DOTALL is set and there are no back
          references to the subpattern in which .* appears
.sp
For such patterns, the PCRE_ANCHORED bit is set in the options returned by
\fBpcre_fullinfo()\fP.
.sp
  PCRE_INFO_SIZE
.sp
Return the size of the compiled pattern, that is, the value that was passed as
the argument to \fBpcre_malloc()\fP when PCRE was getting memory in which to
place the compiled data. The fourth argument should point to a \fBsize_t\fP
variable.
.sp
  PCRE_INFO_STUDYSIZE
.sp
Return the size of the data block pointed to by the \fIstudy_data\fP field in
a \fBpcre_extra\fP block. That is, it is the value that was passed to
\fBpcre_malloc()\fP when PCRE was getting memory into which to place the data
created by \fBpcre_study()\fP. If \fBpcre_extra\fP is NULL, or there is no
study data, zero is returned. The fourth argument should point to a
\fBsize_t\fP variable.
.
.
.SH "OBSOLETE INFO FUNCTION"
.rs
.sp
.B int pcre_info(const pcre *\fIcode\fP, int *\fIoptptr\fP, int
.B *\fIfirstcharptr\fP);
.PP
The \fBpcre_info()\fP function is now obsolete because its interface is too
restrictive to return all the available data about a compiled pattern. New
programs should use \fBpcre_fullinfo()\fP instead. The yield of
\fBpcre_info()\fP is the number of capturing subpatterns, or one of the
following negative numbers:
.sp
  PCRE_ERROR_NULL       the argument \fIcode\fP was NULL
  PCRE_ERROR_BADMAGIC   the "magic number" was not found
.sp
If the \fIoptptr\fP argument is not NULL, a copy of the options with which the
pattern was compiled is placed in the integer it points to (see
PCRE_INFO_OPTIONS above).
.P
If the pattern is not anchored and the \fIfirstcharptr\fP argument is not NULL,
it is used to pass back information about the first character of any matched
string (see PCRE_INFO_FIRSTBYTE above).
.
.
.SH "REFERENCE COUNTS"
.rs
.sp
.B int pcre_refcount(pcre *\fIcode\fP, int \fIadjust\fP);
.PP
The \fBpcre_refcount()\fP function is used to maintain a reference count in the
data block that contains a compiled pattern. It is provided for the benefit of
applications that operate in an object-oriented manner, where different parts
of the application may be using the same compiled pattern, but you want to free
the block when they are all done.
.P
When a pattern is compiled, the reference count field is initialized to zero.
It is changed only by calling this function, whose action is to add the
\fIadjust\fP value (which may be positive or negative) to it. The yield of the
function is the new value. However, the value of the count is constrained to
lie between 0 and 65535, inclusive. If the new value is outside these limits,
it is forced to the appropriate limit value.
.P
Except when it is zero, the reference count is not correctly preserved if a
pattern is compiled on one host and then transferred to a host whose byte-order
is different. (This seems a highly unlikely scenario.)
.
.
.SH "MATCHING A PATTERN: THE TRADITIONAL FUNCTION"
.rs
.sp
.B int pcre_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP);
.P
The function \fBpcre_exec()\fP is called to match a subject string against a
compiled pattern, which is passed in the \fIcode\fP argument. If the
pattern was studied, the result of the study should be passed in the
\fIextra\fP argument. This function is the main matching facility of the
library, and it operates in a Perl-like manner. For specialist use there is
also an alternative matching function, which is described
.\" HTML <a href="#dfamatch">
.\" </a>
below
.\"
in the section about the \fBpcre_dfa_exec()\fP function.
.P
In most applications, the pattern will have been compiled (and optionally
studied) in the same process that calls \fBpcre_exec()\fP. However, it is
possible to save compiled patterns and study data, and then use them later
in different processes, possibly even on different hosts. For a discussion
about this, see the
.\" HREF
\fBpcreprecompile\fP
.\"
documentation.
.P
Here is an example of a simple call to \fBpcre_exec()\fP:
.sp
  int rc;
  int ovector[30];
  rc = pcre_exec(
    re,             /* result of pcre_compile() */
    NULL,           /* we didn't study the pattern */
    "some string",  /* the subject string */
    11,             /* the length of the subject string */
    0,              /* start at offset 0 in the subject */
    0,              /* default options */
    ovector,        /* vector of integers for substring information */
    30);            /* number of elements (NOT size in bytes) */
.
.\" HTML <a name="extradata"></a>
.SS "Extra data for \fBpcre_exec()\fR"
.rs
.sp
If the \fIextra\fP argument is not NULL, it must point to a \fBpcre_extra\fP
data block. The \fBpcre_study()\fP function returns such a block (when it
doesn't return NULL), but you can also create one for yourself, and pass
additional information in it. The \fBpcre_extra\fP block contains the following
fields (not necessarily in this order):
.sp
  unsigned long int \fIflags\fP;
  void *\fIstudy_data\fP;
  unsigned long int \fImatch_limit\fP;
  unsigned long int \fImatch_limit_recursion\fP;
  void *\fIcallout_data\fP;
  const unsigned char *\fItables\fP;
.sp
The \fIflags\fP field is a bitmap that specifies which of the other fields
are set. The flag bits are:
.sp
  PCRE_EXTRA_STUDY_DATA
  PCRE_EXTRA_MATCH_LIMIT
  PCRE_EXTRA_MATCH_LIMIT_RECURSION
  PCRE_EXTRA_CALLOUT_DATA
  PCRE_EXTRA_TABLES
.sp
Other flag bits should be set to zero. The \fIstudy_data\fP field is set in the
\fBpcre_extra\fP block that is returned by \fBpcre_study()\fP, together with
the appropriate flag bit. You should not set this yourself, but you may add to
the block by setting the other fields and their corresponding flag bits.
.P
The \fImatch_limit\fP field provides a means of preventing PCRE from using up a
vast amount of resources when running patterns that are not going to match,
but which have a very large number of possibilities in their search trees. The
classic example is a pattern that uses nested unlimited repeats.
.P
Internally, PCRE uses a function called \fBmatch()\fP which it calls repeatedly
(sometimes recursively). The limit set by \fImatch_limit\fP is imposed on the
number of times this function is called during a match, which has the effect of
limiting the amount of backtracking that can take place. For patterns that are
not anchored, the count restarts from zero for each position in the subject
string.
.P
The default value for the limit can be set when PCRE is built; the default
default is 10 million, which handles all but the most extreme cases. You can
override the default by suppling \fBpcre_exec()\fP with a \fBpcre_extra\fP
block in which \fImatch_limit\fP is set, and PCRE_EXTRA_MATCH_LIMIT is set in
the \fIflags\fP field. If the limit is exceeded, \fBpcre_exec()\fP returns
PCRE_ERROR_MATCHLIMIT.
.P
The \fImatch_limit_recursion\fP field is similar to \fImatch_limit\fP, but
instead of limiting the total number of times that \fBmatch()\fP is called, it
limits the depth of recursion. The recursion depth is a smaller number than the
total number of calls, because not all calls to \fBmatch()\fP are recursive.
This limit is of use only if it is set smaller than \fImatch_limit\fP.
.P
Limiting the recursion depth limits the amount of stack that can be used, or,
when PCRE has been compiled to use memory on the heap instead of the stack, the
amount of heap memory that can be used.
.P
The default value for \fImatch_limit_recursion\fP can be set when PCRE is
built; the default default is the same value as the default for
\fImatch_limit\fP. You can override the default by suppling \fBpcre_exec()\fP
with a \fBpcre_extra\fP block in which \fImatch_limit_recursion\fP is set, and
PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the \fIflags\fP field. If the limit
is exceeded, \fBpcre_exec()\fP returns PCRE_ERROR_RECURSIONLIMIT.
.P
The \fIcallout_data\fP field is used in conjunction with the "callout" feature,
and is described in the
.\" HREF
\fBpcrecallout\fP
.\"
documentation.
.P
The \fItables\fP field is used to pass a character tables pointer to
\fBpcre_exec()\fP; this overrides the value that is stored with the compiled
pattern. A non-NULL value is stored with the compiled pattern only if custom
tables were supplied to \fBpcre_compile()\fP via its \fItableptr\fP argument.
If NULL is passed to \fBpcre_exec()\fP using this mechanism, it forces PCRE's
internal tables to be used. This facility is helpful when re-using patterns
that have been saved after compiling with an external set of tables, because
the external tables might be at a different address when \fBpcre_exec()\fP is
called. See the
.\" HREF
\fBpcreprecompile\fP
.\"
documentation for a discussion of saving compiled patterns for later use.
.
.\" HTML <a name="execoptions"></a>
.SS "Option bits for \fBpcre_exec()\fP"
.rs
.sp
The unused bits of the \fIoptions\fP argument for \fBpcre_exec()\fP must be
zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_\fIxxx\fP,
PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
PCRE_NO_START_OPTIMIZE, PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_SOFT, and
PCRE_PARTIAL_HARD.
.sp
  PCRE_ANCHORED
.sp
The PCRE_ANCHORED option limits \fBpcre_exec()\fP to matching at the first
matching position. If a pattern was compiled with PCRE_ANCHORED, or turned out
to be anchored by virtue of its contents, it cannot be made unachored at
matching time.
.sp
  PCRE_BSR_ANYCRLF
  PCRE_BSR_UNICODE
.sp
These options (which are mutually exclusive) control what the \eR escape
sequence matches. The choice is either to match only CR, LF, or CRLF, or to
match any Unicode newline sequence. These options override the choice that was
made or defaulted when the pattern was compiled.
.sp
  PCRE_NEWLINE_CR
  PCRE_NEWLINE_LF
  PCRE_NEWLINE_CRLF
  PCRE_NEWLINE_ANYCRLF
  PCRE_NEWLINE_ANY
.sp
These options override the newline definition that was chosen or defaulted when
the pattern was compiled. For details, see the description of
\fBpcre_compile()\fP above. During matching, the newline choice affects the
behaviour of the dot, circumflex, and dollar metacharacters. It may also alter
the way the match position is advanced after a match failure for an unanchored
pattern.
.P
When PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANYCRLF, or PCRE_NEWLINE_ANY is set, and a
match attempt for an unanchored pattern fails when the current position is at a
CRLF sequence, and the pattern contains no explicit matches for CR or LF
characters, the match position is advanced by two characters instead of one, in
other words, to after the CRLF.
.P
The above rule is a compromise that makes the most common cases work as
expected. For example, if the pattern is .+A (and the PCRE_DOTALL option is not
set), it does not match the string "\er\enA" because, after failing at the
start, it skips both the CR and the LF before retrying. However, the pattern
[\er\en]A does match that string, because it contains an explicit CR or LF
reference, and so advances only by one character after the first failure.
.P
An explicit match for CR of LF is either a literal appearance of one of those
characters, or one of the \er or \en escape sequences. Implicit matches such as
[^X] do not count, nor does \es (which includes CR and LF in the characters
that it matches).
.P
Notwithstanding the above, anomalous effects may still occur when CRLF is a
valid newline sequence and explicit \er or \en escapes appear in the pattern.
.sp
  PCRE_NOTBOL
.sp
This option specifies that first character of the subject string is not the
beginning of a line, so the circumflex metacharacter should not match before
it. Setting this without PCRE_MULTILINE (at compile time) causes circumflex
never to match. This option affects only the behaviour of the circumflex
metacharacter. It does not affect \eA.
.sp
  PCRE_NOTEOL
.sp
This option specifies that the end of the subject string is not the end of a
line, so the dollar metacharacter should not match it nor (except in multiline
mode) a newline immediately before it. Setting this without PCRE_MULTILINE (at
compile time) causes dollar never to match. This option affects only the
behaviour of the dollar metacharacter. It does not affect \eZ or \ez.
.sp
  PCRE_NOTEMPTY
.sp
An empty string is not considered to be a valid match if this option is set. If
there are alternatives in the pattern, they are tried. If all the alternatives
match the empty string, the entire match fails. For example, if the pattern
.sp
  a?b?
.sp
is applied to a string not beginning with "a" or "b", it matches an empty
string at the start of the subject. With PCRE_NOTEMPTY set, this match is not
valid, so PCRE searches further into the string for occurrences of "a" or "b".
.sp
  PCRE_NOTEMPTY_ATSTART
.sp
This is like PCRE_NOTEMPTY, except that an empty string match that is not at
the start of the subject is permitted. If the pattern is anchored, such a match
can occur only if the pattern contains \eK.
.P
Perl has no direct equivalent of PCRE_NOTEMPTY or PCRE_NOTEMPTY_ATSTART, but it
does make a special case of a pattern match of the empty string within its
\fBsplit()\fP function, and when using the /g modifier. It is possible to
emulate Perl's behaviour after matching a null string by first trying the match
again at the same offset with PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED, and then
if that fails, by advancing the starting offset (see below) and trying an
ordinary match again. There is some code that demonstrates how to do this in
the
.\" HREF
\fBpcredemo\fP
.\"
sample program.
.sp
  PCRE_NO_START_OPTIMIZE
.sp
There are a number of optimizations that \fBpcre_exec()\fP uses at the start of
a match, in order to speed up the process. For example, if it is known that a
match must start with a specific character, it searches the subject for that
character, and fails immediately if it cannot find it, without actually running
the main matching function. When callouts are in use, these optimizations can
cause them to be skipped. This option disables the "start-up" optimizations,
causing performance to suffer, but ensuring that the callouts do occur.
.sp
  PCRE_NO_UTF8_CHECK
.sp
When PCRE_UTF8 is set at compile time, the validity of the subject as a UTF-8
string is automatically checked when \fBpcre_exec()\fP is subsequently called.
The value of \fIstartoffset\fP is also checked to ensure that it points to the
start of a UTF-8 character. There is a discussion about the validity of UTF-8
strings in the
.\" HTML <a href="pcre.html#utf8strings">
.\" </a>
section on UTF-8 support
.\"
in the main
.\" HREF
\fBpcre\fP
.\"
page. If an invalid UTF-8 sequence of bytes is found, \fBpcre_exec()\fP returns
the error PCRE_ERROR_BADUTF8. If \fIstartoffset\fP contains an invalid value,
PCRE_ERROR_BADUTF8_OFFSET is returned.
.P
If you already know that your subject is valid, and you want to skip these
checks for performance reasons, you can set the PCRE_NO_UTF8_CHECK option when
calling \fBpcre_exec()\fP. You might want to do this for the second and
subsequent calls to \fBpcre_exec()\fP if you are making repeated calls to find
all the matches in a single subject string. However, you should be sure that
the value of \fIstartoffset\fP points to the start of a UTF-8 character. When
PCRE_NO_UTF8_CHECK is set, the effect of passing an invalid UTF-8 string as a
subject, or a value of \fIstartoffset\fP that does not point to the start of a
UTF-8 character, is undefined. Your program may crash.
.sp
  PCRE_PARTIAL_HARD
  PCRE_PARTIAL_SOFT
.sp
These options turn on the partial matching feature. For backwards
compatibility, PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A partial match
occurs if the end of the subject string is reached successfully, but there are
not enough subject characters to complete the match. If this happens when
PCRE_PARTIAL_HARD is set, \fBpcre_exec()\fP immediately returns
PCRE_ERROR_PARTIAL. Otherwise, if PCRE_PARTIAL_SOFT is set, matching continues
by testing any other alternatives. Only if they all fail is PCRE_ERROR_PARTIAL
returned (instead of PCRE_ERROR_NOMATCH). The portion of the string that
was inspected when the partial match was found is set as the first matching
string. There is a more detailed discussion in the
.\" HREF
\fBpcrepartial\fP
.\"
documentation.
.
.SS "The string to be matched by \fBpcre_exec()\fP"
.rs
.sp
The subject string is passed to \fBpcre_exec()\fP as a pointer in
\fIsubject\fP, a length (in bytes) in \fIlength\fP, and a starting byte offset
in \fIstartoffset\fP. In UTF-8 mode, the byte offset must point to the start of
a UTF-8 character. Unlike the pattern string, the subject may contain binary
zero bytes. When the starting offset is zero, the search for a match starts at
the beginning of the subject, and this is by far the most common case.
.P
A non-zero starting offset is useful when searching for another match in the
same subject by calling \fBpcre_exec()\fP again after a previous success.
Setting \fIstartoffset\fP differs from just passing over a shortened string and
setting PCRE_NOTBOL in the case of a pattern that begins with any kind of
lookbehind. For example, consider the pattern
.sp
  \eBiss\eB
.sp
which finds occurrences of "iss" in the middle of words. (\eB matches only if
the current position in the subject is not a word boundary.) When applied to
the string "Mississipi" the first call to \fBpcre_exec()\fP finds the first
occurrence. If \fBpcre_exec()\fP is called again with just the remainder of the
subject, namely "issipi", it does not match, because \eB is always false at the
start of the subject, which is deemed to be a word boundary. However, if
\fBpcre_exec()\fP is passed the entire string again, but with \fIstartoffset\fP
set to 4, it finds the second occurrence of "iss" because it is able to look
behind the starting point to discover that it is preceded by a letter.
.P
If a non-zero starting offset is passed when the pattern is anchored, one
attempt to match at the given offset is made. This can only succeed if the
pattern does not require the match to be at the start of the subject.
.
.SS "How \fBpcre_exec()\fP returns captured substrings"
.rs
.sp
In general, a pattern matches a certain portion of the subject, and in
addition, further substrings from the subject may be picked out by parts of the
pattern. Following the usage in Jeffrey Friedl's book, this is called
"capturing" in what follows, and the phrase "capturing subpattern" is used for
a fragment of a pattern that picks out a substring. PCRE supports several other
kinds of parenthesized subpattern that do not cause substrings to be captured.
.P
Captured substrings are returned to the caller via a vector of integers whose
address is passed in \fIovector\fP. The number of elements in the vector is
passed in \fIovecsize\fP, which must be a non-negative number. \fBNote\fP: this
argument is NOT the size of \fIovector\fP in bytes.
.P
The first two-thirds of the vector is used to pass back captured substrings,
each substring using a pair of integers. The remaining third of the vector is
used as workspace by \fBpcre_exec()\fP while matching capturing subpatterns,
and is not available for passing back information. The number passed in
\fIovecsize\fP should always be a multiple of three. If it is not, it is
rounded down.
.P
When a match is successful, information about captured substrings is returned
in pairs of integers, starting at the beginning of \fIovector\fP, and
continuing up to two-thirds of its length at the most. The first element of
each pair is set to the byte offset of the first character in a substring, and
the second is set to the byte offset of the first character after the end of a
substring. \fBNote\fP: these values are always byte offsets, even in UTF-8
mode. They are not character counts.
.P
The first pair of integers, \fIovector[0]\fP and \fIovector[1]\fP, identify the
portion of the subject string matched by the entire pattern. The next pair is
used for the first capturing subpattern, and so on. The value returned by
\fBpcre_exec()\fP is one more than the highest numbered pair that has been set.
For example, if two substrings have been captured, the returned value is 3. If
there are no capturing subpatterns, the return value from a successful match is
1, indicating that just the first pair of offsets has been set.
.P
If a capturing subpattern is matched repeatedly, it is the last portion of the
string that it matched that is returned.
.P
If the vector is too small to hold all the captured substring offsets, it is
used as far as possible (up to two-thirds of its length), and the function
returns a value of zero. If the substring offsets are not of interest,
\fBpcre_exec()\fP may be called with \fIovector\fP passed as NULL and
\fIovecsize\fP as zero. However, if the pattern contains back references and
the \fIovector\fP is not big enough to remember the related substrings, PCRE
has to get additional memory for use during matching. Thus it is usually
advisable to supply an \fIovector\fP.
.P
The \fBpcre_fullinfo()\fP function can be used to find out how many capturing
subpatterns there are in a compiled pattern. The smallest size for
\fIovector\fP that will allow for \fIn\fP captured substrings, in addition to
the offsets of the substring matched by the whole pattern, is (\fIn\fP+1)*3.
.P
It is possible for capturing subpattern number \fIn+1\fP to match some part of
the subject when subpattern \fIn\fP has not been used at all. For example, if
the string "abc" is matched against the pattern (a|(z))(bc) the return from the
function is 4, and subpatterns 1 and 3 are matched, but 2 is not. When this
happens, both values in the offset pairs corresponding to unused subpatterns
are set to -1.
.P
Offset values that correspond to unused subpatterns at the end of the
expression are also set to -1. For example, if the string "abc" is matched
against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not matched. The
return from the function is 2, because the highest used capturing subpattern
number is 1. However, you can refer to the offsets for the second and third
capturing subpatterns if you wish (assuming the vector is large enough, of
course).
.P
Some convenience functions are provided for extracting the captured substrings
as separate strings. These are described below.
.
.\" HTML <a name="errorlist"></a>
.SS "Error return values from \fBpcre_exec()\fP"
.rs
.sp
If \fBpcre_exec()\fP fails, it returns a negative number. The following are
defined in the header file:
.sp
  PCRE_ERROR_NOMATCH        (-1)
.sp
The subject string did not match the pattern.
.sp
  PCRE_ERROR_NULL           (-2)
.sp
Either \fIcode\fP or \fIsubject\fP was passed as NULL, or \fIovector\fP was
NULL and \fIovecsize\fP was not zero.
.sp
  PCRE_ERROR_BADOPTION      (-3)
.sp
An unrecognized bit was set in the \fIoptions\fP argument.
.sp
  PCRE_ERROR_BADMAGIC       (-4)
.sp
PCRE stores a 4-byte "magic number" at the start of the compiled code, to catch
the case when it is passed a junk pointer and to detect when a pattern that was
compiled in an environment of one endianness is run in an environment with the
other endianness. This is the error that PCRE gives when the magic number is
not present.
.sp
  PCRE_ERROR_UNKNOWN_OPCODE (-5)
.sp
While running the pattern match, an unknown item was encountered in the
compiled pattern. This error could be caused by a bug in PCRE or by overwriting
of the compiled pattern.
.sp
  PCRE_ERROR_NOMEMORY       (-6)
.sp
If a pattern contains back references, but the \fIovector\fP that is passed to
\fBpcre_exec()\fP is not big enough to remember the referenced substrings, PCRE
gets a block of memory at the start of matching to use for this purpose. If the
call via \fBpcre_malloc()\fP fails, this error is given. The memory is
automatically freed at the end of matching.
.sp
  PCRE_ERROR_NOSUBSTRING    (-7)
.sp
This error is used by the \fBpcre_copy_substring()\fP,
\fBpcre_get_substring()\fP, and \fBpcre_get_substring_list()\fP functions (see
below). It is never returned by \fBpcre_exec()\fP.
.sp
  PCRE_ERROR_MATCHLIMIT     (-8)
.sp
The backtracking limit, as specified by the \fImatch_limit\fP field in a
\fBpcre_extra\fP structure (or defaulted) was reached. See the description
above.
.sp
  PCRE_ERROR_CALLOUT        (-9)
.sp
This error is never generated by \fBpcre_exec()\fP itself. It is provided for
use by callout functions that want to yield a distinctive error code. See the
.\" HREF
\fBpcrecallout\fP
.\"
documentation for details.
.sp
  PCRE_ERROR_BADUTF8        (-10)
.sp
A string that contains an invalid UTF-8 byte sequence was passed as a subject.
.sp
  PCRE_ERROR_BADUTF8_OFFSET (-11)
.sp
The UTF-8 byte sequence that was passed as a subject was valid, but the value
of \fIstartoffset\fP did not point to the beginning of a UTF-8 character.
.sp
  PCRE_ERROR_PARTIAL        (-12)
.sp
The subject string did not match, but it did match partially. See the
.\" HREF
\fBpcrepartial\fP
.\"
documentation for details of partial matching.
.sp
  PCRE_ERROR_BADPARTIAL     (-13)
.sp
This code is no longer in use. It was formerly returned when the PCRE_PARTIAL
option was used with a compiled pattern containing items that were not
supported for partial matching. From release 8.00 onwards, there are no
restrictions on partial matching.
.sp
  PCRE_ERROR_INTERNAL       (-14)
.sp
An unexpected internal error has occurred. This error could be caused by a bug
in PCRE or by overwriting of the compiled pattern.
.sp
  PCRE_ERROR_BADCOUNT       (-15)
.sp
This error is given if the value of the \fIovecsize\fP argument is negative.
.sp
  PCRE_ERROR_RECURSIONLIMIT (-21)
.sp
The internal recursion limit, as specified by the \fImatch_limit_recursion\fP
field in a \fBpcre_extra\fP structure (or defaulted) was reached. See the
description above.
.sp
  PCRE_ERROR_BADNEWLINE     (-23)
.sp
An invalid combination of PCRE_NEWLINE_\fIxxx\fP options was given.
.P
Error numbers -16 to -20 and -22 are not used by \fBpcre_exec()\fP.
.
.
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NUMBER"
.rs
.sp
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.PP
.B int pcre_get_substring(const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP,
.ti +5n
.B const char **\fIstringptr\fP);
.PP
.B int pcre_get_substring_list(const char *\fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
.PP
Captured substrings can be accessed directly by using the offsets returned by
\fBpcre_exec()\fP in \fIovector\fP. For convenience, the functions
\fBpcre_copy_substring()\fP, \fBpcre_get_substring()\fP, and
\fBpcre_get_substring_list()\fP are provided for extracting captured substrings
as new, separate, zero-terminated strings. These functions identify substrings
by number. The next section describes functions for extracting named
substrings.
.P
A substring that contains a binary zero is correctly extracted and has a
further zero added on the end, but the result is not, of course, a C string.
However, you can process such a string by referring to the length that is
returned by \fBpcre_copy_substring()\fP and \fBpcre_get_substring()\fP.
Unfortunately, the interface to \fBpcre_get_substring_list()\fP is not adequate
for handling strings containing binary zeros, because the end of the final
string is not independently indicated.
.P
The first three arguments are the same for all three of these functions:
\fIsubject\fP is the subject string that has just been successfully matched,
\fIovector\fP is a pointer to the vector of integer offsets that was passed to
\fBpcre_exec()\fP, and \fIstringcount\fP is the number of substrings that were
captured by the match, including the substring that matched the entire regular
expression. This is the value returned by \fBpcre_exec()\fP if it is greater
than zero. If \fBpcre_exec()\fP returned zero, indicating that it ran out of
space in \fIovector\fP, the value passed as \fIstringcount\fP should be the
number of elements in the vector divided by three.
.P
The functions \fBpcre_copy_substring()\fP and \fBpcre_get_substring()\fP
extract a single substring, whose number is given as \fIstringnumber\fP. A
value of zero extracts the substring that matched the entire pattern, whereas
higher values extract the captured substrings. For \fBpcre_copy_substring()\fP,
the string is placed in \fIbuffer\fP, whose length is given by
\fIbuffersize\fP, while for \fBpcre_get_substring()\fP a new block of memory is
obtained via \fBpcre_malloc\fP, and its address is returned via
\fIstringptr\fP. The yield of the function is the length of the string, not
including the terminating zero, or one of these error codes:
.sp
  PCRE_ERROR_NOMEMORY       (-6)
.sp
The buffer was too small for \fBpcre_copy_substring()\fP, or the attempt to get
memory failed for \fBpcre_get_substring()\fP.
.sp
  PCRE_ERROR_NOSUBSTRING    (-7)
.sp
There is no substring whose number is \fIstringnumber\fP.
.P
The \fBpcre_get_substring_list()\fP function extracts all available substrings
and builds a list of pointers to them. All this is done in a single block of
memory that is obtained via \fBpcre_malloc\fP. The address of the memory block
is returned via \fIlistptr\fP, which is also the start of the list of string
pointers. The end of the list is marked by a NULL pointer. The yield of the
function is zero if all went well, or the error code
.sp
  PCRE_ERROR_NOMEMORY       (-6)
.sp
if the attempt to get the memory block failed.
.P
When any of these functions encounter a substring that is unset, which can
happen when capturing subpattern number \fIn+1\fP matches some part of the
subject, but subpattern \fIn\fP has not been used at all, they return an empty
string. This can be distinguished from a genuine zero-length substring by
inspecting the appropriate offset in \fIovector\fP, which is negative for unset
substrings.
.P
The two convenience functions \fBpcre_free_substring()\fP and
\fBpcre_free_substring_list()\fP can be used to free the memory returned by
a previous call of \fBpcre_get_substring()\fP or
\fBpcre_get_substring_list()\fP, respectively. They do nothing more than call
the function pointed to by \fBpcre_free\fP, which of course could be called
directly from a C program. However, PCRE is used in some situations where it is
linked via a special interface to another programming language that cannot use
\fBpcre_free\fP directly; it is for these cases that the functions are
provided.
.
.
.SH "EXTRACTING CAPTURED SUBSTRINGS BY NAME"
.rs
.sp
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIname\fP);
.PP
.B int pcre_copy_named_substring(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n
.B char *\fIbuffer\fP, int \fIbuffersize\fP);
.PP
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n
.B const char **\fIstringptr\fP);
.PP
To extract a substring by name, you first have to find associated number.
For example, for this pattern
.sp
  (a+)b(?<xxx>\ed+)...
.sp
the number of the subpattern called "xxx" is 2. If the name is known to be
unique (PCRE_DUPNAMES was not set), you can find the number from the name by
calling \fBpcre_get_stringnumber()\fP. The first argument is the compiled
pattern, and the second is the name. The yield of the function is the
subpattern number, or PCRE_ERROR_NOSUBSTRING (-7) if there is no subpattern of
that name.
.P
Given the number, you can extract the substring directly, or use one of the
functions described in the previous section. For convenience, there are also
two functions that do the whole job.
.P
Most of the arguments of \fBpcre_copy_named_substring()\fP and
\fBpcre_get_named_substring()\fP are the same as those for the similarly named
functions that extract by number. As these are described in the previous
section, they are not re-described here. There are just two differences:
.P
First, instead of a substring number, a substring name is given. Second, there
is an extra argument, given at the start, which is a pointer to the compiled
pattern. This is needed in order to gain access to the name-to-number
translation table.
.P
These functions call \fBpcre_get_stringnumber()\fP, and if it succeeds, they
then call \fBpcre_copy_substring()\fP or \fBpcre_get_substring()\fP, as
appropriate. \fBNOTE:\fP If PCRE_DUPNAMES is set and there are duplicate names,
the behaviour may not be what you want (see the next section).
.P
\fBWarning:\fP If the pattern uses the (?| feature to set up multiple
subpatterns with the same number, as described in the
.\" HTML <a href="pcrepattern.html#dupsubpatternnumber">
.\" </a>
section on duplicate subpattern numbers
.\"
in the
.\" HREF
\fBpcrepattern\fP
.\"
page, you cannot use names to distinguish the different subpatterns, because
names are not included in the compiled code. The matching process uses only
numbers. For this reason, the use of different names for subpatterns of the
same number causes an error at compile time.
.
.SH "DUPLICATE SUBPATTERN NAMES"
.rs
.sp
.B int pcre_get_stringtable_entries(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIname\fP, char **\fIfirst\fP, char **\fIlast\fP);
.PP
When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
are not required to be unique. (Duplicate names are always allowed for
subpatterns with the same number, created by using the (?| feature. Indeed, if
such subpatterns are named, they are required to use the same names.)
.P
Normally, patterns with duplicate names are such that in any one match, only
one of the named subpatterns participates. An example is shown in the
.\" HREF
\fBpcrepattern\fP
.\"
documentation.
.P
When duplicates are present, \fBpcre_copy_named_substring()\fP and
\fBpcre_get_named_substring()\fP return the first substring corresponding to
the given name that is set. If none are set, PCRE_ERROR_NOSUBSTRING (-7) is
returned; no data is returned. The \fBpcre_get_stringnumber()\fP function
returns one of the numbers that are associated with the name, but it is not
defined which it is.
.P
If you want to get full details of all captured substrings for a given name,
you must use the \fBpcre_get_stringtable_entries()\fP function. The first
argument is the compiled pattern, and the second is the name. The third and
fourth are pointers to variables which are updated by the function. After it
has run, they point to the first and last entries in the name-to-number table
for the given name. The function itself returns the length of each entry, or
PCRE_ERROR_NOSUBSTRING (-7) if there are none. The format of the table is
described above in the section entitled \fIInformation about a pattern\fP.
Given all the relevant entries for the name, you can extract each of their
numbers, and hence the captured data, if any.
.
.
.SH "FINDING ALL POSSIBLE MATCHES"
.rs
.sp
The traditional matching function uses a similar algorithm to Perl, which stops
when it finds the first match, starting at a given point in the subject. If you
want to find all possible matches, or the longest possible match, consider
using the alternative matching function (see below) instead. If you cannot use
the alternative function, but still need to find all possible matches, you
can kludge it up by making use of the callout facility, which is described in
the
.\" HREF
\fBpcrecallout\fP
.\"
documentation.
.P
What you have to do is to insert a callout right at the end of the pattern.
When your callout function is called, extract and save the current matched
substring. Then return 1, which forces \fBpcre_exec()\fP to backtrack and try
other alternatives. Ultimately, when it runs out of matches, \fBpcre_exec()\fP
will yield PCRE_ERROR_NOMATCH.
.
.
.\" HTML <a name="dfamatch"></a>
.SH "MATCHING A PATTERN: THE ALTERNATIVE FUNCTION"
.rs
.sp
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.P
The function \fBpcre_dfa_exec()\fP is called to match a subject string against
a compiled pattern, using a matching algorithm that scans the subject string
just once, and does not backtrack. This has different characteristics to the
normal algorithm, and is not compatible with Perl. Some of the features of PCRE
patterns are not supported. Nevertheless, there are times when this kind of
matching can be useful. For a discussion of the two matching algorithms, and a
list of features that \fBpcre_dfa_exec()\fP does not support, see the
.\" HREF
\fBpcrematching\fP
.\"
documentation.
.P
The arguments for the \fBpcre_dfa_exec()\fP function are the same as for
\fBpcre_exec()\fP, plus two extras. The \fIovector\fP argument is used in a
different way, and this is described below. The other common arguments are used
in the same way as for \fBpcre_exec()\fP, so their description is not repeated
here.
.P
The two additional arguments provide workspace for the function. The workspace
vector should contain at least 20 elements. It is used for keeping track of
multiple paths through the pattern tree. More workspace will be needed for
patterns and subjects where there are a lot of potential matches.
.P
Here is an example of a simple call to \fBpcre_dfa_exec()\fP:
.sp
  int rc;
  int ovector[10];
  int wspace[20];
  rc = pcre_dfa_exec(
    re,             /* result of pcre_compile() */
    NULL,           /* we didn't study the pattern */
    "some string",  /* the subject string */
    11,             /* the length of the subject string */
    0,              /* start at offset 0 in the subject */
    0,              /* default options */
    ovector,        /* vector of integers for substring information */
    10,             /* number of elements (NOT size in bytes) */
    wspace,         /* working space vector */
    20);            /* number of elements (NOT size in bytes) */
.
.SS "Option bits for \fBpcre_dfa_exec()\fP"
.rs
.sp
The unused bits of the \fIoptions\fP argument for \fBpcre_dfa_exec()\fP must be
zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_\fIxxx\fP,
PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_HARD, PCRE_PARTIAL_SOFT, PCRE_DFA_SHORTEST,
and PCRE_DFA_RESTART. All but the last four of these are exactly the same as
for \fBpcre_exec()\fP, so their description is not repeated here.
.sp
  PCRE_PARTIAL_HARD
  PCRE_PARTIAL_SOFT
.sp
These have the same general effect as they do for \fBpcre_exec()\fP, but the
details are slightly different. When PCRE_PARTIAL_HARD is set for
\fBpcre_dfa_exec()\fP, it returns PCRE_ERROR_PARTIAL if the end of the subject
is reached and there is still at least one matching possibility that requires
additional characters. This happens even if some complete matches have also
been found. When PCRE_PARTIAL_SOFT is set, the return code PCRE_ERROR_NOMATCH
is converted into PCRE_ERROR_PARTIAL if the end of the subject is reached,
there have been no complete matches, but there is still at least one matching
possibility. The portion of the string that was inspected when the longest
partial match was found is set as the first matching string in both cases.
.sp
  PCRE_DFA_SHORTEST
.sp
Setting the PCRE_DFA_SHORTEST option causes the matching algorithm to stop as
soon as it has found one match. Because of the way the alternative algorithm
works, this is necessarily the shortest possible match at the first possible
matching point in the subject string.
.sp
  PCRE_DFA_RESTART
.sp
When \fBpcre_dfa_exec()\fP returns a partial match, it is possible to call it
again, with additional subject characters, and have it continue with the same
match. The PCRE_DFA_RESTART option requests this action; when it is set, the
\fIworkspace\fP and \fIwscount\fP options must reference the same vector as
before because data about the match so far is left in them after a partial
match. There is more discussion of this facility in the
.\" HREF
\fBpcrepartial\fP
.\"
documentation.
.
.SS "Successful returns from \fBpcre_dfa_exec()\fP"
.rs
.sp
When \fBpcre_dfa_exec()\fP succeeds, it may have matched more than one
substring in the subject. Note, however, that all the matches from one run of
the function start at the same point in the subject. The shorter matches are
all initial substrings of the longer matches. For example, if the pattern
.sp
  <.*>
.sp
is matched against the string
.sp
  This is <something> <something else> <something further> no more
.sp
the three matched strings are
.sp
  <something>
  <something> <something else>
  <something> <something else> <something further>
.sp
On success, the yield of the function is a number greater than zero, which is
the number of matched substrings. The substrings themselves are returned in
\fIovector\fP. Each string uses two elements; the first is the offset to the
start, and the second is the offset to the end. In fact, all the strings have
the same start offset. (Space could have been saved by giving this only once,
but it was decided to retain some compatibility with the way \fBpcre_exec()\fP
returns data, even though the meaning of the strings is different.)
.P
The strings are returned in reverse order of length; that is, the longest
matching string is given first. If there were too many matches to fit into
\fIovector\fP, the yield of the function is zero, and the vector is filled with
the longest matches.
.
.SS "Error returns from \fBpcre_dfa_exec()\fP"
.rs
.sp
The \fBpcre_dfa_exec()\fP function returns a negative number when it fails.
Many of the errors are the same as for \fBpcre_exec()\fP, and these are
described
.\" HTML <a href="#errorlist">
.\" </a>
above.
.\"
There are in addition the following errors that are specific to
\fBpcre_dfa_exec()\fP:
.sp
  PCRE_ERROR_DFA_UITEM      (-16)
.sp
This return is given if \fBpcre_dfa_exec()\fP encounters an item in the pattern
that it does not support, for instance, the use of \eC or a back reference.
.sp
  PCRE_ERROR_DFA_UCOND      (-17)
.sp
This return is given if \fBpcre_dfa_exec()\fP encounters a condition item that
uses a back reference for the condition, or a test for recursion in a specific
group. These are not supported.
.sp
  PCRE_ERROR_DFA_UMLIMIT    (-18)
.sp
This return is given if \fBpcre_dfa_exec()\fP is called with an \fIextra\fP
block that contains a setting of the \fImatch_limit\fP field. This is not
supported (it is meaningless).
.sp
  PCRE_ERROR_DFA_WSSIZE     (-19)
.sp
This return is given if \fBpcre_dfa_exec()\fP runs out of space in the
\fIworkspace\fP vector.
.sp
  PCRE_ERROR_DFA_RECURSE    (-20)
.sp
When a recursive subpattern is processed, the matching function calls itself
recursively, using private vectors for \fIovector\fP and \fIworkspace\fP. This
error is given if the output vector is not large enough. This should be
extremely rare, as a vector of size 1000 is used.
.
.
.SH "SEE ALSO"
.rs
.sp
\fBpcrebuild\fP(3), \fBpcrecallout\fP(3), \fBpcrecpp(3)\fP(3),
\fBpcrematching\fP(3), \fBpcrepartial\fP(3), \fBpcreposix\fP(3),
\fBpcreprecompile\fP(3), \fBpcresample\fP(3), \fBpcrestack\fP(3).
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 03 October 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man3/pcresyntax.3000064400000025301150403561440012357 0ustar00.TH PCRESYNTAX 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY"
.rs
.sp
The full syntax and semantics of the regular expressions that are supported by
PCRE are described in the
.\" HREF
\fBpcrepattern\fP
.\"
documentation. This document contains just a quick-reference summary of the
syntax.
.
.
.SH "QUOTING"
.rs
.sp
  \ex         where x is non-alphanumeric is a literal x
  \eQ...\eE    treat enclosed characters as literal
.
.
.SH "CHARACTERS"
.rs
.sp
  \ea         alarm, that is, the BEL character (hex 07)
  \ecx        "control-x", where x is any character
  \ee         escape (hex 1B)
  \ef         formfeed (hex 0C)
  \en         newline (hex 0A)
  \er         carriage return (hex 0D)
  \et         tab (hex 09)
  \eddd       character with octal code ddd, or backreference
  \exhh       character with hex code hh
  \ex{hhh..}  character with hex code hhh..
.
.
.SH "CHARACTER TYPES"
.rs
.sp
  .          any character except newline;
               in dotall mode, any character whatsoever
  \eC         one byte, even in UTF-8 mode (best avoided)
  \ed         a decimal digit
  \eD         a character that is not a decimal digit
  \eh         a horizontal whitespace character
  \eH         a character that is not a horizontal whitespace character
  \ep{\fIxx\fP}     a character with the \fIxx\fP property
  \eP{\fIxx\fP}     a character without the \fIxx\fP property
  \eR         a newline sequence
  \es         a whitespace character
  \eS         a character that is not a whitespace character
  \ev         a vertical whitespace character
  \eV         a character that is not a vertical whitespace character
  \ew         a "word" character
  \eW         a "non-word" character
  \eX         an extended Unicode sequence
.sp
In PCRE, \ed, \eD, \es, \eS, \ew, and \eW recognize only ASCII characters.
.
.
.SH "GENERAL CATEGORY PROPERTY CODES FOR \ep and \eP"
.rs
.sp
  C          Other
  Cc         Control
  Cf         Format
  Cn         Unassigned
  Co         Private use
  Cs         Surrogate
.sp
  L          Letter
  Ll         Lower case letter
  Lm         Modifier letter
  Lo         Other letter
  Lt         Title case letter
  Lu         Upper case letter
  L&         Ll, Lu, or Lt
.sp
  M          Mark
  Mc         Spacing mark
  Me         Enclosing mark
  Mn         Non-spacing mark
.sp
  N          Number
  Nd         Decimal number
  Nl         Letter number
  No         Other number
.sp
  P          Punctuation
  Pc         Connector punctuation
  Pd         Dash punctuation
  Pe         Close punctuation
  Pf         Final punctuation
  Pi         Initial punctuation
  Po         Other punctuation
  Ps         Open punctuation
.sp
  S          Symbol
  Sc         Currency symbol
  Sk         Modifier symbol
  Sm         Mathematical symbol
  So         Other symbol
.sp
  Z          Separator
  Zl         Line separator
  Zp         Paragraph separator
  Zs         Space separator
.
.
.SH "SCRIPT NAMES FOR \ep AND \eP"
.rs
.sp
Arabic,
Armenian,
Avestan,
Balinese,
Bamum,
Bengali,
Bopomofo,
Braille,
Buginese,
Buhid,
Canadian_Aboriginal,
Carian,
Cham,
Cherokee,
Common,
Coptic,
Cuneiform,
Cypriot,
Cyrillic,
Deseret,
Devanagari,
Egyptian_Hieroglyphs,
Ethiopic,
Georgian,
Glagolitic,
Gothic,
Greek,
Gujarati,
Gurmukhi,
Han,
Hangul,
Hanunoo,
Hebrew,
Hiragana,
Imperial_Aramaic,
Inherited,
Inscriptional_Pahlavi,
Inscriptional_Parthian,
Javanese,
Kaithi,
Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
Khmer,
Lao,
Latin,
Lepcha,
Limbu,
Linear_B,
Lisu,
Lycian,
Lydian,
Malayalam,
Meetei_Mayek,
Mongolian,
Myanmar,
New_Tai_Lue,
Nko,
Ogham,
Old_Italic,
Old_Persian,
Old_South_Arabian,
Old_Turkic,
Ol_Chiki,
Oriya,
Osmanya,
Phags_Pa,
Phoenician,
Rejang,
Runic,
Samaritan,
Saurashtra,
Shavian,
Sinhala,
Sundanese,
Syloti_Nagri,
Syriac,
Tagalog,
Tagbanwa,
Tai_Le,
Tai_Tham,
Tai_Viet,
Tamil,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Ugaritic,
Vai,
Yi.
.
.
.SH "CHARACTER CLASSES"
.rs
.sp
  [...]       positive character class
  [^...]      negative character class
  [x-y]       range (can be used for hex characters)
  [[:xxx:]]   positive POSIX named set
  [[:^xxx:]]  negative POSIX named set
.sp
  alnum       alphanumeric
  alpha       alphabetic
  ascii       0-127
  blank       space or tab
  cntrl       control character
  digit       decimal digit
  graph       printing, excluding space
  lower       lower case letter
  print       printing, including space
  punct       printing, excluding alphanumeric
  space       whitespace
  upper       upper case letter
  word        same as \ew
  xdigit      hexadecimal digit
.sp
In PCRE, POSIX character set names recognize only ASCII characters. You can use
\eQ...\eE inside a character class.
.
.
.SH "QUANTIFIERS"
.rs
.sp
  ?           0 or 1, greedy
  ?+          0 or 1, possessive
  ??          0 or 1, lazy
  *           0 or more, greedy
  *+          0 or more, possessive
  *?          0 or more, lazy
  +           1 or more, greedy
  ++          1 or more, possessive
  +?          1 or more, lazy
  {n}         exactly n
  {n,m}       at least n, no more than m, greedy
  {n,m}+      at least n, no more than m, possessive
  {n,m}?      at least n, no more than m, lazy
  {n,}        n or more, greedy
  {n,}+       n or more, possessive
  {n,}?       n or more, lazy
.
.
.SH "ANCHORS AND SIMPLE ASSERTIONS"
.rs
.sp
  \eb          word boundary (only ASCII letters recognized)
  \eB          not a word boundary
  ^           start of subject
               also after internal newline in multiline mode
  \eA          start of subject
  $           end of subject
               also before newline at end of subject
               also before internal newline in multiline mode
  \eZ          end of subject
               also before newline at end of subject
  \ez          end of subject
  \eG          first matching position in subject
.
.
.SH "MATCH POINT RESET"
.rs
.sp
  \eK          reset start of match
.
.
.SH "ALTERNATION"
.rs
.sp
  expr|expr|expr...
.
.
.SH "CAPTURING"
.rs
.sp
  (...)           capturing group
  (?<name>...)    named capturing group (Perl)
  (?'name'...)    named capturing group (Perl)
  (?P<name>...)   named capturing group (Python)
  (?:...)         non-capturing group
  (?|...)         non-capturing group; reset group numbers for
                   capturing groups in each alternative
.
.
.SH "ATOMIC GROUPS"
.rs
.sp
  (?>...)         atomic, non-capturing group
.
.
.
.
.SH "COMMENT"
.rs
.sp
  (?#....)        comment (not nestable)
.
.
.SH "OPTION SETTING"
.rs
.sp
  (?i)            caseless
  (?J)            allow duplicate names
  (?m)            multiline
  (?s)            single line (dotall)
  (?U)            default ungreedy (lazy)
  (?x)            extended (ignore white space)
  (?-...)         unset option(s)
.sp
The following is recognized only at the start of a pattern or after one of the
newline-setting options with similar syntax:
.sp
  (*UTF8)         set UTF-8 mode
.
.
.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS"
.rs
.sp
  (?=...)         positive look ahead
  (?!...)         negative look ahead
  (?<=...)        positive look behind
  (?<!...)        negative look behind
.sp
Each top-level branch of a look behind must be of a fixed length.
.
.
.SH "BACKREFERENCES"
.rs
.sp
  \en              reference by number (can be ambiguous)
  \egn             reference by number
  \eg{n}           reference by number
  \eg{-n}          relative reference by number
  \ek<name>        reference by name (Perl)
  \ek'name'        reference by name (Perl)
  \eg{name}        reference by name (Perl)
  \ek{name}        reference by name (.NET)
  (?P=name)       reference by name (Python)
.
.
.SH "SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)"
.rs
.sp
  (?R)            recurse whole pattern
  (?n)            call subpattern by absolute number
  (?+n)           call subpattern by relative number
  (?-n)           call subpattern by relative number
  (?&name)        call subpattern by name (Perl)
  (?P>name)       call subpattern by name (Python)
  \eg<name>        call subpattern by name (Oniguruma)
  \eg'name'        call subpattern by name (Oniguruma)
  \eg<n>           call subpattern by absolute number (Oniguruma)
  \eg'n'           call subpattern by absolute number (Oniguruma)
  \eg<+n>          call subpattern by relative number (PCRE extension)
  \eg'+n'          call subpattern by relative number (PCRE extension)
  \eg<-n>          call subpattern by relative number (PCRE extension)
  \eg'-n'          call subpattern by relative number (PCRE extension)
.
.
.SH "CONDITIONAL PATTERNS"
.rs
.sp
  (?(condition)yes-pattern)
  (?(condition)yes-pattern|no-pattern)
.sp
  (?(n)...        absolute reference condition
  (?(+n)...       relative reference condition
  (?(-n)...       relative reference condition
  (?(<name>)...   named reference condition (Perl)
  (?('name')...   named reference condition (Perl)
  (?(name)...     named reference condition (PCRE)
  (?(R)...        overall recursion condition
  (?(Rn)...       specific group recursion condition
  (?(R&name)...   specific recursion condition
  (?(DEFINE)...   define subpattern for reference
  (?(assert)...   assertion condition
.
.
.SH "BACKTRACKING CONTROL"
.rs
.sp
The following act immediately they are reached:
.sp
  (*ACCEPT)       force successful match
  (*FAIL)         force backtrack; synonym (*F)
.sp
The following act only when a subsequent match failure causes a backtrack to
reach them. They all force a match failure, but they differ in what happens
afterwards. Those that advance the start-of-match point do so only if the
pattern is not anchored.
.sp
  (*COMMIT)       overall failure, no advance of starting point
  (*PRUNE)        advance to next starting character
  (*SKIP)         advance start to current matching position
  (*THEN)         local failure, backtrack to next alternation
.
.
.SH "NEWLINE CONVENTIONS"
.rs
.sp
These are recognized only at the very start of the pattern or after a
(*BSR_...) or (*UTF8) option.
.sp
  (*CR)           carriage return only
  (*LF)           linefeed only
  (*CRLF)         carriage return followed by linefeed
  (*ANYCRLF)      all three of the above
  (*ANY)          any Unicode newline sequence
.
.
.SH "WHAT \eR MATCHES"
.rs
.sp
These are recognized only at the very start of the pattern or after a
(*...) option that sets the newline convention or UTF-8 mode.
.sp
  (*BSR_ANYCRLF)  CR, LF, or CRLF
  (*BSR_UNICODE)  any Unicode newline sequence
.
.
.SH "CALLOUTS"
.rs
.sp
  (?C)      callout
  (?Cn)     callout with data n
.
.
.SH "SEE ALSO"
.rs
.sp
\fBpcrepattern\fP(3), \fBpcreapi\fP(3), \fBpcrecallout\fP(3),
\fBpcrematching\fP(3), \fBpcre\fP(3).
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 01 March 2010
Copyright (c) 1997-2010 University of Cambridge.
.fi
usr/share/man/man3/pcrecompat.3000064400000015457150403561440012327 0ustar00.TH PCRECOMPAT 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "DIFFERENCES BETWEEN PCRE AND PERL"
.rs
.sp
This document describes the differences in the ways that PCRE and Perl handle
regular expressions. The differences described here are with respect to Perl
5.10.
.P
1. PCRE has only a subset of Perl's UTF-8 and Unicode support. Details of what
it does have are given in the
.\" HTML <a href="pcre.html#utf8support">
.\" </a>
section on UTF-8 support
.\"
in the main
.\" HREF
\fBpcre\fP
.\"
page.
.P
2. PCRE does not allow repeat quantifiers on lookahead assertions. Perl permits
them, but they do not mean what you might think. For example, (?!a){3} does
not assert that the next three characters are not "a". It just asserts that the
next character is not "a" three times.
.P
3. Capturing subpatterns that occur inside negative lookahead assertions are
counted, but their entries in the offsets vector are never set. Perl sets its
numerical variables from any such patterns that are matched before the
assertion fails to match something (thereby succeeding), but only if the
negative lookahead assertion contains just one branch.
.P
4. Though binary zero characters are supported in the subject string, they are
not allowed in a pattern string because it is passed as a normal C string,
terminated by zero. The escape sequence \e0 can be used in the pattern to
represent a binary zero.
.P
5. The following Perl escape sequences are not supported: \el, \eu, \eL,
\eU, and \eN. In fact these are implemented by Perl's general string-handling
and are not part of its pattern matching engine. If any of these are
encountered by PCRE, an error is generated.
.P
6. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE is
built with Unicode character property support. The properties that can be
tested with \ep and \eP are limited to the general category properties such as
Lu and Nd, script names such as Greek or Han, and the derived properties Any
and L&. PCRE does support the Cs (surrogate) property, which Perl does not; the
Perl documentation says "Because Perl hides the need for the user to understand
the internal representation of Unicode characters, there is no need to
implement the somewhat messy concept of surrogates."
.P
7. PCRE does support the \eQ...\eE escape for quoting substrings. Characters in
between are treated as literals. This is slightly different from Perl in that $
and @ are also handled as literals inside the quotes. In Perl, they cause
variable interpolation (but of course PCRE does not have variables). Note the
following examples:
.sp
    Pattern            PCRE matches      Perl matches
.sp
.\" JOIN
    \eQabc$xyz\eE        abc$xyz           abc followed by the
                                           contents of $xyz
    \eQabc\e$xyz\eE       abc\e$xyz          abc\e$xyz
    \eQabc\eE\e$\eQxyz\eE   abc$xyz           abc$xyz
.sp
The \eQ...\eE sequence is recognized both inside and outside character classes.
.P
8. Fairly obviously, PCRE does not support the (?{code}) and (??{code})
constructions. However, there is support for recursive patterns. This is not
available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE "callout"
feature allows an external function to be called during pattern matching. See
the
.\" HREF
\fBpcrecallout\fP
.\"
documentation for details.
.P
9. Subpatterns that are called recursively or as "subroutines" are always
treated as atomic groups in PCRE. This is like Python, but unlike Perl. There
is a discussion of an example that explains this in more detail in the
.\" HTML <a href="pcrepattern.html#recursiondifference">
.\" </a>
section on recursion differences from Perl
.\"
in the
.\" HREF
\fBpcrepattern\fP
.\"
page.
.P
10. There are some differences that are concerned with the settings of captured
strings when part of a pattern is repeated. For example, matching "aba" against
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
.P
11. PCRE does support Perl 5.10's backtracking verbs (*ACCEPT), (*FAIL), (*F),
(*COMMIT), (*PRUNE), (*SKIP), and (*THEN), but only in the forms without an
argument. PCRE does not support (*MARK).
.P
12. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
names is not as general as Perl's. This is a consequence of the fact the PCRE
works internally just with numbers, using an external table to translate
between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b)B),
where the two capturing parentheses have the same number but different names,
is not supported, and causes an error at compile time. If it were allowed, it
would not be possible to distinguish which parentheses matched, because both
names map to capturing subpattern number 1. To avoid this confusing situation,
an error is given at compile time.
.P
13. PCRE provides some extensions to the Perl regular expression facilities.
Perl 5.10 includes new features that are not in earlier versions of Perl, some
of which (such as named parentheses) have been in PCRE for some time. This list
is with respect to Perl 5.10:
.sp
(a) Although lookbehind assertions in PCRE must match fixed length strings,
each alternative branch of a lookbehind assertion can match a different length
of string. Perl requires them all to have the same length.
.sp
(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $
meta-character matches only at the very end of the string.
.sp
(c) If PCRE_EXTRA is set, a backslash followed by a letter with no special
meaning is faulted. Otherwise, like Perl, the backslash is quietly ignored.
(Perl can be made to issue a warning.)
.sp
(d) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is
inverted, that is, by default they are not greedy, but if followed by a
question mark they are.
.sp
(e) PCRE_ANCHORED can be used at matching time to force a pattern to be tried
only at the first matching position in the subject string.
.sp
(f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, and
PCRE_NO_AUTO_CAPTURE options for \fBpcre_exec()\fP have no Perl equivalents.
.sp
(g) The \eR escape sequence can be restricted to match only CR, LF, or CRLF
by the PCRE_BSR_ANYCRLF option.
.sp
(h) The callout facility is PCRE-specific.
.sp
(i) The partial matching facility is PCRE-specific.
.sp
(j) Patterns compiled by PCRE can be saved and re-used at a later time, even on
different hosts that have the other endianness.
.sp
(k) The alternative matching function (\fBpcre_dfa_exec()\fP) matches in a
different way and is not Perl-compatible.
.sp
(l) PCRE recognizes some special sequences such as (*CR) at the start of
a pattern that set overall options that cannot be changed within the pattern.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 04 October 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man3/pcre_free_substring.3000064400000001104150403561440014204 0ustar00.TH PCRE_FREE_SUBSTRING 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B void pcre_free_substring(const char *\fIstringptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This is a convenience function for freeing the store obtained by a previous
call to \fBpcre_get_substring()\fP or \fBpcre_get_named_substring()\fP. Its
only argument is a pointer to the string.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_compile2.3000064400000005775150403561440012717 0ustar00.TH PCRE_COMPILE2 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B pcre *pcre_compile2(const char *\fIpattern\fP, int \fIoptions\fP,
.ti +5n
.B int *\fIerrorcodeptr\fP,
.ti +5n
.B const char **\fIerrptr\fP, int *\fIerroffset\fP,
.ti +5n
.B const unsigned char *\fItableptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This function compiles a regular expression into an internal form. It is the
same as \fBpcre_compile()\fP, except for the addition of the \fIerrorcodeptr\fP
argument. The arguments are:

.sp
  \fIpattern\fR       A zero-terminated string containing the
                  regular expression to be compiled
  \fIoptions\fR       Zero or more option bits
  \fIerrorcodeptr\fP  Where to put an error code
  \fIerrptr\fR        Where to put an error message
  \fIerroffset\fR     Offset in pattern where error was found
  \fItableptr\fR      Pointer to character tables, or NULL to
                  use the built-in default
.sp
The option bits are:
.sp
  PCRE_ANCHORED           Force pattern anchoring
  PCRE_AUTO_CALLOUT       Compile automatic callouts
  PCRE_BSR_ANYCRLF        \eR matches only CR, LF, or CRLF
  PCRE_BSR_UNICODE        \eR matches all Unicode line endings
  PCRE_CASELESS           Do caseless matching
  PCRE_DOLLAR_ENDONLY     $ not to match newline at end
  PCRE_DOTALL             . matches anything including NL
  PCRE_DUPNAMES           Allow duplicate names for subpatterns
  PCRE_EXTENDED           Ignore whitespace and # comments
  PCRE_EXTRA              PCRE extra features
                            (not much use currently)
  PCRE_FIRSTLINE          Force matching to be before newline
  PCRE_JAVASCRIPT_COMPAT  JavaScript compatibility
  PCRE_MULTILINE          ^ and $ match newlines within data
  PCRE_NEWLINE_ANY        Recognize any Unicode newline sequence
  PCRE_NEWLINE_ANYCRLF    Recognize CR, LF, and CRLF as newline
                            sequences
  PCRE_NEWLINE_CR         Set CR as the newline sequence
  PCRE_NEWLINE_CRLF       Set CRLF as the newline sequence
  PCRE_NEWLINE_LF         Set LF as the newline sequence
  PCRE_NO_AUTO_CAPTURE    Disable numbered capturing paren-
                            theses (named ones available)
  PCRE_NO_UTF8_CHECK      Do not check the pattern for UTF-8
                            validity (only relevant if
                            PCRE_UTF8 is set)
  PCRE_UNGREEDY           Invert greediness of quantifiers
  PCRE_UTF8               Run in UTF-8 mode
.sp
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
PCRE_NO_UTF8_CHECK.
.P
The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected. Note that
compiling regular expressions with one version of PCRE for use with a different
version is not guaranteed to work and may cause crashes.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fR
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fR
.\"
page.
usr/share/man/man3/pcre_dfa_exec.3000064400000007551150403561450012736 0ustar00.TH PCRE_DFA_EXEC 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_dfa_exec(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B "const char *\fIsubject\fP," int \fIlength\fP, int \fIstartoffset\fP,
.ti +5n
.B int \fIoptions\fP, int *\fIovector\fP, int \fIovecsize\fP,
.ti +5n
.B int *\fIworkspace\fP, int \fIwscount\fP);
.
.SH DESCRIPTION
.rs
.sp
This function matches a compiled regular expression against a given subject
string, using an alternative matching algorithm that scans the subject string
just once (\fInot\fP Perl-compatible). Note that the main, Perl-compatible,
matching function is \fBpcre_exec()\fP. The arguments for this function are:
.sp
  \fIcode\fP         Points to the compiled pattern
  \fIextra\fP        Points to an associated \fBpcre_extra\fP structure,
                 or is NULL
  \fIsubject\fP      Points to the subject string
  \fIlength\fP       Length of the subject string, in bytes
  \fIstartoffset\fP  Offset in bytes in the subject at which to
                 start matching
  \fIoptions\fP      Option bits
  \fIovector\fP      Points to a vector of ints for result offsets
  \fIovecsize\fP     Number of elements in the vector
  \fIworkspace\fP    Points to a vector of ints used as working space
  \fIwscount\fP      Number of elements in the vector
.sp
The options are:
.sp
  PCRE_ANCHORED          Match only at the first position
  PCRE_BSR_ANYCRLF       \eR matches only CR, LF, or CRLF
  PCRE_BSR_UNICODE       \eR matches all Unicode line endings
  PCRE_NEWLINE_ANY       Recognize any Unicode newline sequence
  PCRE_NEWLINE_ANYCRLF   Recognize CR, LF, & CRLF as newline sequences
  PCRE_NEWLINE_CR        Recognize CR as the only newline sequence
  PCRE_NEWLINE_CRLF      Recognize CRLF as the only newline sequence
  PCRE_NEWLINE_LF        Recognize LF as the only newline sequence
  PCRE_NOTBOL            Subject is not the beginning of a line
  PCRE_NOTEOL            Subject is not the end of a line
  PCRE_NOTEMPTY          An empty string is not a valid match
  PCRE_NOTEMPTY_ATSTART  An empty string at the start of the subject
                           is not a valid match
  PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
  PCRE_NO_UTF8_CHECK     Do not check the subject for UTF-8
                           validity (only relevant if PCRE_UTF8
                           was set at compile time)
  PCRE_PARTIAL           ) Return PCRE_ERROR_PARTIAL for a partial
  PCRE_PARTIAL_SOFT      )   match if no full matches are found
  PCRE_PARTIAL_HARD      Return PCRE_ERROR_PARTIAL for a partial match
                           even if there is a full match as well
  PCRE_DFA_SHORTEST      Return only the shortest match
  PCRE_DFA_RESTART       Restart after a partial match
.sp
There are restrictions on what may appear in a pattern when using this matching
function. Details are given in the
.\" HREF
\fBpcrematching\fP
.\"
documentation. For details of partial matching, see the
.\" HREF
\fBpcrepartial\fP
.\"
page.
.P
A \fBpcre_extra\fP structure contains the following fields:
.sp
  \fIflags\fP        Bits indicating which fields are set
  \fIstudy_data\fP   Opaque data from \fBpcre_study()\fP
  \fImatch_limit\fP  Limit on internal resource use
  \fImatch_limit_recursion\fP  Limit on internal recursion depth
  \fIcallout_data\fP Opaque data passed back to callouts
  \fItables\fP       Points to character tables or is NULL
.sp
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
PCRE_EXTRA_TABLES. For this matching function, the \fImatch_limit\fP and
\fImatch_limit_recursion\fP fields are not used, and must not be set.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_get_stringnumber.3000064400000001733150403561450014552 0ustar00.TH PCRE_GET_STRINGNUMBER 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_get_stringnumber(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIname\fP);
.
.SH DESCRIPTION
.rs
.sp
This convenience function finds the number of a named substring capturing
parenthesis in a compiled pattern. Its arguments are:
.sp
  \fIcode\fP    Compiled regular expression
  \fIname\fP    Name whose number is required
.sp
The yield of the function is the number of the parenthesis if the name is
found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by
\fBpcre_get_stringnumber()\fP. You can obtain the complete list by calling
\fBpcre_get_stringtable_entries()\fP.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcrecallout.3000064400000016751150403561450012506 0ustar00.TH PCRECALLOUT 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE CALLOUTS"
.rs
.sp
.B int (*pcre_callout)(pcre_callout_block *);
.PP
PCRE provides a feature called "callout", which is a means of temporarily
passing control to the caller of PCRE in the middle of pattern matching. The
caller of PCRE provides an external function by putting its entry point in the
global variable \fIpcre_callout\fP. By default, this variable contains NULL,
which disables all calling out.
.P
Within a regular expression, (?C) indicates the points at which the external
function is to be called. Different callout points can be identified by putting
a number less than 256 after the letter C. The default value is zero.
For example, this pattern has two callout points:
.sp
  (?C1)abc(?C2)def
.sp
If the PCRE_AUTO_CALLOUT option bit is set when \fBpcre_compile()\fP or
\fBpcre_compile2()\fP is called, PCRE automatically inserts callouts, all with
number 255, before each item in the pattern. For example, if PCRE_AUTO_CALLOUT
is used with the pattern
.sp
  A(\ed{2}|--)
.sp
it is processed as if it were
.sp
(?C255)A(?C255)((?C255)\ed{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
.sp
Notice that there is a callout before and after each parenthesis and
alternation bar. Automatic callouts can be used for tracking the progress of
pattern matching. The
.\" HREF
\fBpcretest\fP
.\"
command has an option that sets automatic callouts; when it is used, the output
indicates how the pattern is matched. This is useful information when you are
trying to optimize the performance of a particular pattern.
.
.
.SH "MISSING CALLOUTS"
.rs
.sp
You should be aware that, because of optimizations in the way PCRE matches
patterns by default, callouts sometimes do not happen. For example, if the
pattern is
.sp
  ab(?C4)cd
.sp
PCRE knows that any matching string must contain the letter "d". If the subject
string is "abyz", the lack of "d" means that matching doesn't ever start, and
the callout is never reached. However, with "abyd", though the result is still
no match, the callout is obeyed.
.P
If the pattern is studied, PCRE knows the minimum length of a matching string,
and will immediately give a "no match" return without actually running a match
if the subject is not long enough, or, for unanchored patterns, if it has
been scanned far enough.
.P
You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE
option to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. This slows down the
matching process, but does ensure that callouts such as the example above are
obeyed.
.
.
.SH "THE CALLOUT INTERFACE"
.rs
.sp
During matching, when PCRE reaches a callout point, the external function
defined by \fIpcre_callout\fP is called (if it is set). This applies to both
the \fBpcre_exec()\fP and the \fBpcre_dfa_exec()\fP matching functions. The
only argument to the callout function is a pointer to a \fBpcre_callout\fP
block. This structure contains the following fields:
.sp
  int          \fIversion\fP;
  int          \fIcallout_number\fP;
  int         *\fIoffset_vector\fP;
  const char  *\fIsubject\fP;
  int          \fIsubject_length\fP;
  int          \fIstart_match\fP;
  int          \fIcurrent_position\fP;
  int          \fIcapture_top\fP;
  int          \fIcapture_last\fP;
  void        *\fIcallout_data\fP;
  int          \fIpattern_position\fP;
  int          \fInext_item_length\fP;
.sp
The \fIversion\fP field is an integer containing the version number of the
block format. The initial version was 0; the current version is 1. The version
number will change again in future if additional fields are added, but the
intention is never to remove any of the existing fields.
.P
The \fIcallout_number\fP field contains the number of the callout, as compiled
into the pattern (that is, the number after ?C for manual callouts, and 255 for
automatically generated callouts).
.P
The \fIoffset_vector\fP field is a pointer to the vector of offsets that was
passed by the caller to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP. When
\fBpcre_exec()\fP is used, the contents can be inspected in order to extract
substrings that have been matched so far, in the same way as for extracting
substrings after a match has completed. For \fBpcre_dfa_exec()\fP this field is
not useful.
.P
The \fIsubject\fP and \fIsubject_length\fP fields contain copies of the values
that were passed to \fBpcre_exec()\fP.
.P
The \fIstart_match\fP field normally contains the offset within the subject at
which the current match attempt started. However, if the escape sequence \eK
has been encountered, this value is changed to reflect the modified starting
point. If the pattern is not anchored, the callout function may be called
several times from the same point in the pattern for different starting points
in the subject.
.P
The \fIcurrent_position\fP field contains the offset within the subject of the
current match pointer.
.P
When the \fBpcre_exec()\fP function is used, the \fIcapture_top\fP field
contains one more than the number of the highest numbered captured substring so
far. If no substrings have been captured, the value of \fIcapture_top\fP is
one. This is always the case when \fBpcre_dfa_exec()\fP is used, because it
does not support captured substrings.
.P
The \fIcapture_last\fP field contains the number of the most recently captured
substring. If no substrings have been captured, its value is -1. This is always
the case when \fBpcre_dfa_exec()\fP is used.
.P
The \fIcallout_data\fP field contains a value that is passed to
\fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP specifically so that it can be
passed back in callouts. It is passed in the \fIpcre_callout\fP field of the
\fBpcre_extra\fP data structure. If no such data was passed, the value of
\fIcallout_data\fP in a \fBpcre_callout\fP block is NULL. There is a
description of the \fBpcre_extra\fP structure in the
.\" HREF
\fBpcreapi\fP
.\"
documentation.
.P
The \fIpattern_position\fP field is present from version 1 of the
\fIpcre_callout\fP structure. It contains the offset to the next item to be
matched in the pattern string.
.P
The \fInext_item_length\fP field is present from version 1 of the
\fIpcre_callout\fP structure. It contains the length of the next item to be
matched in the pattern string. When the callout immediately precedes an
alternation bar, a closing parenthesis, or the end of the pattern, the length
is zero. When the callout precedes an opening parenthesis, the length is that
of the entire subpattern.
.P
The \fIpattern_position\fP and \fInext_item_length\fP fields are intended to
help in distinguishing between different automatic callouts, which all have the
same callout number. However, they are set for all callouts.
.
.
.SH "RETURN VALUES"
.rs
.sp
The external callout function returns an integer to PCRE. If the value is zero,
matching proceeds as normal. If the value is greater than zero, matching fails
at the current point, but the testing of other matching possibilities goes
ahead, just as if a lookahead assertion had failed. If the value is less than
zero, the match is abandoned, and \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP
returns the negative value.
.P
Negative values should normally be chosen from the set of PCRE_ERROR_xxx
values. In particular, PCRE_ERROR_NOMATCH forces a standard "no match" failure.
The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions;
it will never be used by PCRE itself.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 29 September 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man3/pcre_get_named_substring.3000064400000002533150403561450015216 0ustar00.TH PCRE_GET_NAMED_SUBSTRING 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_get_named_substring(const pcre *\fIcode\fP,
.ti +5n
.B const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, const char *\fIstringname\fP,
.ti +5n
.B const char **\fIstringptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This is a convenience function for extracting a captured substring by name. The
arguments are:
.sp
  \fIcode\fP          Compiled pattern
  \fIsubject\fP       Subject that has been successfully matched
  \fIovector\fP       Offset vector that \fBpcre_exec()\fP used
  \fIstringcount\fP   Value returned by \fBpcre_exec()\fP
  \fIstringname\fP    Name of the required substring
  \fIstringptr\fP     Where to put the string pointer
.sp
The memory in which the substring is placed is obtained by calling
\fBpcre_malloc()\fP. The convenience function \fBpcre_free_substring()\fP can
be used to free it when it is no longer needed. The yield of the function is
the length of the extracted substring, PCRE_ERROR_NOMEMORY if sufficient memory
could not be obtained, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcreprecompile.3000064400000012522150403561450013172 0ustar00.TH PCREPRECOMPILE 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "SAVING AND RE-USING PRECOMPILED PCRE PATTERNS"
.rs
.sp
If you are running an application that uses a large number of regular
expression patterns, it may be useful to store them in a precompiled form
instead of having to compile them every time the application is run.
If you are not using any private character tables (see the
.\" HREF
\fBpcre_maketables()\fP
.\"
documentation), this is relatively straightforward. If you are using private
tables, it is a little bit more complicated.
.P
If you save compiled patterns to a file, you can copy them to a different host
and run them there. This works even if the new host has the opposite endianness
to the one on which the patterns were compiled. There may be a small
performance penalty, but it should be insignificant. However, compiling regular
expressions with one version of PCRE for use with a different version is not
guaranteed to work and may cause crashes.
.
.
.SH "SAVING A COMPILED PATTERN"
.rs
.sh
The value returned by \fBpcre_compile()\fP points to a single block of memory
that holds the compiled pattern and associated data. You can find the length of
this block in bytes by calling \fBpcre_fullinfo()\fP with an argument of
PCRE_INFO_SIZE. You can then save the data in any appropriate manner. Here is
sample code that compiles a pattern and writes it to a file. It assumes that
the variable \fIfd\fP refers to a file that is open for output:
.sp
  int erroroffset, rc, size;
  char *error;
  pcre *re;
.sp
  re = pcre_compile("my pattern", 0, &error, &erroroffset, NULL);
  if (re == NULL) { ... handle errors ... }
  rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
  if (rc < 0) { ... handle errors ... }
  rc = fwrite(re, 1, size, fd);
  if (rc != size) { ... handle errors ... }
.sp
In this example, the bytes that comprise the compiled pattern are copied
exactly. Note that this is binary data that may contain any of the 256 possible
byte values. On systems that make a distinction between binary and non-binary
data, be sure that the file is opened for binary output.
.P
If you want to write more than one pattern to a file, you will have to devise a
way of separating them. For binary data, preceding each pattern with its length
is probably the most straightforward approach. Another possibility is to write
out the data in hexadecimal instead of binary, one pattern to a line.
.P
Saving compiled patterns in a file is only one possible way of storing them for
later use. They could equally well be saved in a database, or in the memory of
some daemon process that passes them via sockets to the processes that want
them.
.P
If the pattern has been studied, it is also possible to save the study data in
a similar way to the compiled pattern itself. When studying generates
additional information, \fBpcre_study()\fP returns a pointer to a
\fBpcre_extra\fP data block. Its format is defined in the
.\" HTML <a href="pcreapi.html#extradata">
.\" </a>
section on matching a pattern
.\"
in the
.\" HREF
\fBpcreapi\fP
.\"
documentation. The \fIstudy_data\fP field points to the binary study data, and
this is what you must save (not the \fBpcre_extra\fP block itself). The length
of the study data can be obtained by calling \fBpcre_fullinfo()\fP with an
argument of PCRE_INFO_STUDYSIZE. Remember to check that \fBpcre_study()\fP did
return a non-NULL value before trying to save the study data.
.
.
.SH "RE-USING A PRECOMPILED PATTERN"
.rs
.sp
Re-using a precompiled pattern is straightforward. Having reloaded it into main
memory, you pass its pointer to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP in
the usual way. This should work even on another host, and even if that host has
the opposite endianness to the one where the pattern was compiled.
.P
However, if you passed a pointer to custom character tables when the pattern
was compiled (the \fItableptr\fP argument of \fBpcre_compile()\fP), you must
now pass a similar pointer to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP,
because the value saved with the compiled pattern will obviously be nonsense. A
field in a \fBpcre_extra()\fP block is used to pass this data, as described in
the
.\" HTML <a href="pcreapi.html#extradata">
.\" </a>
section on matching a pattern
.\"
in the
.\" HREF
\fBpcreapi\fP
.\"
documentation.
.P
If you did not provide custom character tables when the pattern was compiled,
the pointer in the compiled pattern is NULL, which causes \fBpcre_exec()\fP to
use PCRE's internal tables. Thus, you do not need to take any special action at
run time in this case.
.P
If you saved study data with the compiled pattern, you need to create your own
\fBpcre_extra\fP data block and set the \fIstudy_data\fP field to point to the
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
\fIflags\fP field to indicate that study data is present. Then pass the
\fBpcre_extra\fP block to \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP in the
usual way.
.
.
.SH "COMPATIBILITY WITH DIFFERENT PCRE RELEASES"
.rs
.sp
In general, it is safest to recompile all saved patterns when you update to a
new PCRE release, though not all updates actually require this. Recompiling is
definitely needed for release 7.2.
.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 13 June 2007
Copyright (c) 1997-2007 University of Cambridge.
.fi
usr/share/man/man3/pcre_version.3000064400000000726150403561450012662 0ustar00.TH PCRE_VERSION 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B char *pcre_version(void);
.
.SH DESCRIPTION
.rs
.sp
This function returns a character string that gives the version number of the
PCRE library and the date of its release.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcreposix.3000064400000024100150403561450012170 0ustar00.TH PCREPOSIX 3
.SH NAME
PCRE - Perl-compatible regular expressions.
.SH "SYNOPSIS OF POSIX API"
.rs
.sp
.B #include <pcreposix.h>
.PP
.SM
.B int regcomp(regex_t *\fIpreg\fP, const char *\fIpattern\fP,
.ti +5n
.B int \fIcflags\fP);
.PP
.B int regexec(regex_t *\fIpreg\fP, const char *\fIstring\fP,
.ti +5n
.B size_t \fInmatch\fP, regmatch_t \fIpmatch\fP[], int \fIeflags\fP);
.PP
.B size_t regerror(int \fIerrcode\fP, const regex_t *\fIpreg\fP,
.ti +5n
.B char *\fIerrbuf\fP, size_t \fIerrbuf_size\fP);
.PP
.B void regfree(regex_t *\fIpreg\fP);
.
.SH DESCRIPTION
.rs
.sp
This set of functions provides a POSIX-style API to the PCRE regular expression
package. See the
.\" HREF
\fBpcreapi\fP
.\"
documentation for a description of PCRE's native API, which contains much
additional functionality.
.P
The functions described here are just wrapper functions that ultimately call
the PCRE native API. Their prototypes are defined in the \fBpcreposix.h\fP
header file, and on Unix systems the library itself is called
\fBpcreposix.a\fP, so can be accessed by adding \fB-lpcreposix\fP to the
command for linking an application that uses them. Because the POSIX functions
call the native ones, it is also necessary to add \fB-lpcre\fP.
.P
I have implemented only those POSIX option bits that can be reasonably mapped
to PCRE native options. In addition, the option REG_EXTENDED is defined with
the value zero. This has no effect, but since programs that are written to the
POSIX interface often use it, this makes it easier to slot in PCRE as a
replacement library. Other POSIX options are not even defined.
.P
There are also some other options that are not defined by POSIX. These have
been added at the request of users who want to make use of certain
PCRE-specific features via the POSIX calling interface.
.P
When PCRE is called via these functions, it is only the API that is POSIX-like
in style. The syntax and semantics of the regular expressions themselves are
still those of Perl, subject to the setting of various PCRE options, as
described below. "POSIX-like in style" means that the API approximates to the
POSIX definition; it is not fully POSIX-compatible, and in multi-byte encoding
domains it is probably even less compatible.
.P
The header for these functions is supplied as \fBpcreposix.h\fP to avoid any
potential clash with other POSIX libraries. It can, of course, be renamed or
aliased as \fBregex.h\fP, which is the "correct" name. It provides two
structure types, \fIregex_t\fP for compiled internal forms, and
\fIregmatch_t\fP for returning captured substrings. It also defines some
constants whose names start with "REG_"; these are used for setting options and
identifying error codes.
.P
.SH "COMPILING A PATTERN"
.rs
.sp
The function \fBregcomp()\fP is called to compile a pattern into an
internal form. The pattern is a C string terminated by a binary zero, and
is passed in the argument \fIpattern\fP. The \fIpreg\fP argument is a pointer
to a \fBregex_t\fP structure that is used as a base for storing information
about the compiled regular expression.
.P
The argument \fIcflags\fP is either zero, or contains one or more of the bits
defined by the following macros:
.sp
  REG_DOTALL
.sp
The PCRE_DOTALL option is set when the regular expression is passed for
compilation to the native function. Note that REG_DOTALL is not part of the
POSIX standard.
.sp
  REG_ICASE
.sp
The PCRE_CASELESS option is set when the regular expression is passed for
compilation to the native function.
.sp
  REG_NEWLINE
.sp
The PCRE_MULTILINE option is set when the regular expression is passed for
compilation to the native function. Note that this does \fInot\fP mimic the
defined POSIX behaviour for REG_NEWLINE (see the following section).
.sp
  REG_NOSUB
.sp
The PCRE_NO_AUTO_CAPTURE option is set when the regular expression is passed
for compilation to the native function. In addition, when a pattern that is
compiled with this flag is passed to \fBregexec()\fP for matching, the
\fInmatch\fP and \fIpmatch\fP arguments are ignored, and no captured strings
are returned.
.sp
  REG_UNGREEDY
.sp
The PCRE_UNGREEDY option is set when the regular expression is passed for
compilation to the native function. Note that REG_UNGREEDY is not part of the
POSIX standard.
.sp
  REG_UTF8
.sp
The PCRE_UTF8 option is set when the regular expression is passed for
compilation to the native function. This causes the pattern itself and all data
strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF8
is not part of the POSIX standard.
.P
In the absence of these flags, no options are passed to the native function.
This means the the regex is compiled with PCRE default semantics. In
particular, the way it handles newline characters in the subject string is the
Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only
\fIsome\fP of the effects specified for REG_NEWLINE. It does not affect the way
newlines are matched by . (they are not) or by a negative class such as [^a]
(they are).
.P
The yield of \fBregcomp()\fP is zero on success, and non-zero otherwise. The
\fIpreg\fP structure is filled in on success, and one member of the structure
is public: \fIre_nsub\fP contains the number of capturing subpatterns in
the regular expression. Various error codes are defined in the header file.
.P
NOTE: If the yield of \fBregcomp()\fP is non-zero, you must not attempt to
use the contents of the \fIpreg\fP structure. If, for example, you pass it to
\fBregexec()\fP, the result is undefined and your program is likely to crash.
.
.
.SH "MATCHING NEWLINE CHARACTERS"
.rs
.sp
This area is not simple, because POSIX and Perl take different views of things.
It is not possible to get PCRE to obey POSIX semantics, but then PCRE was never
intended to be a POSIX engine. The following table lists the different
possibilities for matching newline characters in PCRE:
.sp
                          Default   Change with
.sp
  . matches newline          no     PCRE_DOTALL
  newline matches [^a]       yes    not changeable
  $ matches \en at end        yes    PCRE_DOLLARENDONLY
  $ matches \en in middle     no     PCRE_MULTILINE
  ^ matches \en in middle     no     PCRE_MULTILINE
.sp
This is the equivalent table for POSIX:
.sp
                          Default   Change with
.sp
  . matches newline          yes    REG_NEWLINE
  newline matches [^a]       yes    REG_NEWLINE
  $ matches \en at end        no     REG_NEWLINE
  $ matches \en in middle     no     REG_NEWLINE
  ^ matches \en in middle     no     REG_NEWLINE
.sp
PCRE's behaviour is the same as Perl's, except that there is no equivalent for
PCRE_DOLLAR_ENDONLY in Perl. In both PCRE and Perl, there is no way to stop
newline from matching [^a].
.P
The default POSIX newline handling can be obtained by setting PCRE_DOTALL and
PCRE_DOLLAR_ENDONLY, but there is no way to make PCRE behave exactly as for the
REG_NEWLINE action.
.
.
.SH "MATCHING A PATTERN"
.rs
.sp
The function \fBregexec()\fP is called to match a compiled pattern \fIpreg\fP
against a given \fIstring\fP, which is by default terminated by a zero byte
(but see REG_STARTEND below), subject to the options in \fIeflags\fP. These can
be:
.sp
  REG_NOTBOL
.sp
The PCRE_NOTBOL option is set when calling the underlying PCRE matching
function.
.sp
  REG_NOTEMPTY
.sp
The PCRE_NOTEMPTY option is set when calling the underlying PCRE matching
function. Note that REG_NOTEMPTY is not part of the POSIX standard. However,
setting this option can give more POSIX-like behaviour in some situations.
.sp
  REG_NOTEOL
.sp
The PCRE_NOTEOL option is set when calling the underlying PCRE matching
function.
.sp
  REG_STARTEND
.sp
The string is considered to start at \fIstring\fP + \fIpmatch[0].rm_so\fP and
to have a terminating NUL located at \fIstring\fP + \fIpmatch[0].rm_eo\fP
(there need not actually be a NUL at that location), regardless of the value of
\fInmatch\fP. This is a BSD extension, compatible with but not specified by
IEEE Standard 1003.2 (POSIX.2), and should be used with caution in software
intended to be portable to other systems. Note that a non-zero \fIrm_so\fP does
not imply REG_NOTBOL; REG_STARTEND affects only the location of the string, not
how it is matched.
.P
If the pattern was compiled with the REG_NOSUB flag, no data about any matched
strings is returned. The \fInmatch\fP and \fIpmatch\fP arguments of
\fBregexec()\fP are ignored.
.P
If the value of \fInmatch\fP is zero, or if the value \fIpmatch\fP is NULL,
no data about any matched strings is returned.
.P
Otherwise,the portion of the string that was matched, and also any captured
substrings, are returned via the \fIpmatch\fP argument, which points to an
array of \fInmatch\fP structures of type \fIregmatch_t\fP, containing the
members \fIrm_so\fP and \fIrm_eo\fP. These contain the offset to the first
character of each substring and the offset to the first character after the end
of each substring, respectively. The 0th element of the vector relates to the
entire portion of \fIstring\fP that was matched; subsequent elements relate to
the capturing subpatterns of the regular expression. Unused entries in the
array have both structure members set to -1.
.P
A successful match yields a zero return; various error codes are defined in the
header file, of which REG_NOMATCH is the "expected" failure code.
.
.
.SH "ERROR MESSAGES"
.rs
.sp
The \fBregerror()\fP function maps a non-zero errorcode from either
\fBregcomp()\fP or \fBregexec()\fP to a printable message. If \fIpreg\fP is not
NULL, the error should have arisen from the use of that structure. A message
terminated by a binary zero is placed in \fIerrbuf\fP. The length of the
message, including the zero, is limited to \fIerrbuf_size\fP. The yield of the
function is the size of buffer needed to hold the whole message.
.
.
.SH MEMORY USAGE
.rs
.sp
Compiling a regular expression causes memory to be allocated and associated
with the \fIpreg\fP structure. The function \fBregfree()\fP frees all such
memory, after which \fIpreg\fP may no longer be used as a compiled expression.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 02 September 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man3/pcrecpp.3000064400000030410150403561450011611 0ustar00.TH PCRECPP 3
.SH NAME
PCRE - Perl-compatible regular expressions.
.SH "SYNOPSIS OF C++ WRAPPER"
.rs
.sp
.B #include <pcrecpp.h>
.
.SH DESCRIPTION
.rs
.sp
The C++ wrapper for PCRE was provided by Google Inc. Some additional
functionality was added by Giuseppe Maxia. This brief man page was constructed
from the notes in the \fIpcrecpp.h\fP file, which should be consulted for
further details.
.
.
.SH "MATCHING INTERFACE"
.rs
.sp
The "FullMatch" operation checks that supplied text matches a supplied pattern
exactly. If pointer arguments are supplied, it copies matched sub-strings that
match sub-patterns into them.
.sp
  Example: successful match
     pcrecpp::RE re("h.*o");
     re.FullMatch("hello");
.sp
  Example: unsuccessful match (requires full match):
     pcrecpp::RE re("e");
     !re.FullMatch("hello");
.sp
  Example: creating a temporary RE object:
     pcrecpp::RE("h.*o").FullMatch("hello");
.sp
You can pass in a "const char*" or a "string" for "text". The examples below
tend to use a const char*. You can, as in the different examples above, store
the RE object explicitly in a variable or use a temporary RE object. The
examples below use one mode or the other arbitrarily. Either could correctly be
used for any of these examples.
.P
You must supply extra pointer arguments to extract matched subpieces.
.sp
  Example: extracts "ruby" into "s" and 1234 into "i"
     int i;
     string s;
     pcrecpp::RE re("(\e\ew+):(\e\ed+)");
     re.FullMatch("ruby:1234", &s, &i);
.sp
  Example: does not try to extract any extra sub-patterns
     re.FullMatch("ruby:1234", &s);
.sp
  Example: does not try to extract into NULL
     re.FullMatch("ruby:1234", NULL, &i);
.sp
  Example: integer overflow causes failure
     !re.FullMatch("ruby:1234567891234", NULL, &i);
.sp
  Example: fails because there aren't enough sub-patterns:
     !pcrecpp::RE("\e\ew+:\e\ed+").FullMatch("ruby:1234", &s);
.sp
  Example: fails because string cannot be stored in integer
     !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
.sp
The provided pointer arguments can be pointers to any scalar numeric
type, or one of:
.sp
   string        (matched piece is copied to string)
   StringPiece   (StringPiece is mutated to point to matched piece)
   T             (where "bool T::ParseFrom(const char*, int)" exists)
   NULL          (the corresponding matched sub-pattern is not copied)
.sp
The function returns true iff all of the following conditions are satisfied:
.sp
  a. "text" matches "pattern" exactly;
.sp
  b. The number of matched sub-patterns is >= number of supplied
     pointers;
.sp
  c. The "i"th argument has a suitable type for holding the
     string captured as the "i"th sub-pattern. If you pass in
     void * NULL for the "i"th argument, or a non-void * NULL
     of the correct type, or pass fewer arguments than the
     number of sub-patterns, "i"th captured sub-pattern is
     ignored.
.sp
CAVEAT: An optional sub-pattern that does not exist in the matched
string is assigned the empty string. Therefore, the following will
return false (because the empty string is not a valid number):
.sp
   int number;
   pcrecpp::RE::FullMatch("abc", "[a-z]+(\e\ed+)?", &number);
.sp
The matching interface supports at most 16 arguments per call.
If you need more, consider using the more general interface
\fBpcrecpp::RE::DoMatch\fP. See \fBpcrecpp.h\fP for the signature for
\fBDoMatch\fP.
.P
NOTE: Do not use \fBno_arg\fP, which is used internally to mark the end of a
list of optional arguments, as a placeholder for missing arguments, as this can
lead to segfaults.
.
.
.SH "QUOTING METACHARACTERS"
.rs
.sp
You can use the "QuoteMeta" operation to insert backslashes before all
potentially meaningful characters in a string. The returned string, used as a
regular expression, will exactly match the original string.
.sp
  Example:
     string quoted = RE::QuoteMeta(unquoted);
.sp
Note that it's legal to escape a character even if it has no special meaning in
a regular expression -- so this function does that. (This also makes it
identical to the perl function of the same name; see "perldoc -f quotemeta".)
For example, "1.5-2.0?" becomes "1\e.5\e-2\e.0\e?".
.
.SH "PARTIAL MATCHES"
.rs
.sp
You can use the "PartialMatch" operation when you want the pattern
to match any substring of the text.
.sp
  Example: simple search for a string:
     pcrecpp::RE("ell").PartialMatch("hello");
.sp
  Example: find first number in a string:
     int number;
     pcrecpp::RE re("(\e\ed+)");
     re.PartialMatch("x*100 + 20", &number);
     assert(number == 100);
.
.
.SH "UTF-8 AND THE MATCHING INTERFACE"
.rs
.sp
By default, pattern and text are plain text, one byte per character. The UTF8
flag, passed to the constructor, causes both pattern and string to be treated
as UTF-8 text, still a byte stream but potentially multiple bytes per
character. In practice, the text is likelier to be UTF-8 than the pattern, but
the match returned may depend on the UTF8 flag, so always use it when matching
UTF8 text. For example, "." will match one byte normally but with UTF8 set may
match up to three bytes of a multi-byte character.
.sp
  Example:
     pcrecpp::RE_Options options;
     options.set_utf8();
     pcrecpp::RE re(utf8_pattern, options);
     re.FullMatch(utf8_string);
.sp
  Example: using the convenience function UTF8():
     pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
     re.FullMatch(utf8_string);
.sp
NOTE: The UTF8 flag is ignored if pcre was not configured with the
      --enable-utf8 flag.
.
.
.SH "PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE"
.rs
.sp
PCRE defines some modifiers to change the behavior of the regular expression
engine. The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle to
pass such modifiers to a RE class. Currently, the following modifiers are
supported:
.sp
   modifier              description               Perl corresponding
.sp
   PCRE_CASELESS         case insensitive match      /i
   PCRE_MULTILINE        multiple lines match        /m
   PCRE_DOTALL           dot matches newlines        /s
   PCRE_DOLLAR_ENDONLY   $ matches only at end       N/A
   PCRE_EXTRA            strict escape parsing       N/A
   PCRE_EXTENDED         ignore whitespaces          /x
   PCRE_UTF8             handles UTF8 chars          built-in
   PCRE_UNGREEDY         reverses * and *?           N/A
   PCRE_NO_AUTO_CAPTURE  disables capturing parens   N/A (*)
.sp
(*) Both Perl and PCRE allow non capturing parentheses by means of the
"?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
capture, while (ab|cd) does.
.P
For a full account on how each modifier works, please check the
PCRE API reference page.
.P
For each modifier, there are two member functions whose name is made
out of the modifier in lowercase, without the "PCRE_" prefix. For
instance, PCRE_CASELESS is handled by
.sp
  bool caseless()
.sp
which returns true if the modifier is set, and
.sp
  RE_Options & set_caseless(bool)
.sp
which sets or unsets the modifier. Moreover, PCRE_EXTRA_MATCH_LIMIT can be
accessed through the \fBset_match_limit()\fR and \fBmatch_limit()\fR member
functions. Setting \fImatch_limit\fR to a non-zero value will limit the
execution of pcre to keep it from doing bad things like blowing the stack or
taking an eternity to return a result. A value of 5000 is good enough to stop
stack blowup in a 2MB thread stack. Setting \fImatch_limit\fR to zero disables
match limiting. Alternatively, you can call \fBmatch_limit_recursion()\fP
which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much PCRE
recurses. \fBmatch_limit()\fP limits the number of matches PCRE does;
\fBmatch_limit_recursion()\fP limits the depth of internal recursion, and
therefore the amount of stack that is used.
.P
Normally, to pass one or more modifiers to a RE class, you declare
a \fIRE_Options\fR object, set the appropriate options, and pass this
object to a RE constructor. Example:
.sp
   RE_options opt;
   opt.set_caseless(true);
   if (RE("HELLO", opt).PartialMatch("hello world")) ...
.sp
RE_options has two constructors. The default constructor takes no arguments and
creates a set of flags that are off by default. The optional parameter
\fIoption_flags\fR is to facilitate transfer of legacy code from C programs.
This lets you do
.sp
   RE(pattern,
     RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
.sp
However, new code is better off doing
.sp
   RE(pattern,
     RE_Options().set_caseless(true).set_multiline(true))
       .PartialMatch(str);
.sp
If you are going to pass one of the most used modifiers, there are some
convenience functions that return a RE_Options class with the
appropriate modifier already set: \fBCASELESS()\fR, \fBUTF8()\fR,
\fBMULTILINE()\fR, \fBDOTALL\fR(), and \fBEXTENDED()\fR.
.P
If you need to set several options at once, and you don't want to go through
the pains of declaring a RE_Options object and setting several options, there
is a parallel method that give you such ability on the fly. You can concatenate
several \fBset_xxxxx()\fR member functions, since each of them returns a
reference to its class object. For example, to pass PCRE_CASELESS,
PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one statement, you may write:
.sp
   RE(" ^ xyz \e\es+ .* blah$",
     RE_Options()
       .set_caseless(true)
       .set_extended(true)
       .set_multiline(true)).PartialMatch(sometext);
.sp
.
.
.SH "SCANNING TEXT INCREMENTALLY"
.rs
.sp
The "Consume" operation may be useful if you want to repeatedly
match regular expressions at the front of a string and skip over
them as they match. This requires use of the "StringPiece" type,
which represents a sub-range of a real string. Like RE, StringPiece
is defined in the pcrecpp namespace.
.sp
  Example: read lines of the form "var = value" from a string.
     string contents = ...;                 // Fill string somehow
     pcrecpp::StringPiece input(contents);  // Wrap in a StringPiece

     string var;
     int value;
     pcrecpp::RE re("(\e\ew+) = (\e\ed+)\en");
     while (re.Consume(&input, &var, &value)) {
       ...;
     }
.sp
Each successful call to "Consume" will set "var/value", and also
advance "input" so it points past the matched text.
.P
The "FindAndConsume" operation is similar to "Consume" but does not
anchor your match at the beginning of the string. For example, you
could extract all words from a string by repeatedly calling
.sp
  pcrecpp::RE("(\e\ew+)").FindAndConsume(&input, &word)
.
.
.SH "PARSING HEX/OCTAL/C-RADIX NUMBERS"
.rs
.sp
By default, if you pass a pointer to a numeric value, the
corresponding text is interpreted as a base-10 number. You can
instead wrap the pointer with a call to one of the operators Hex(),
Octal(), or CRadix() to interpret the text in another base. The
CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
prefixes, but defaults to base-10.
.sp
  Example:
    int a, b, c, d;
    pcrecpp::RE re("(.*) (.*) (.*) (.*)");
    re.FullMatch("100 40 0100 0x40",
                 pcrecpp::Octal(&a), pcrecpp::Hex(&b),
                 pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
.sp
will leave 64 in a, b, c, and d.
.
.
.SH "REPLACING PARTS OF STRINGS"
.rs
.sp
You can replace the first match of "pattern" in "str" with "rewrite".
Within "rewrite", backslash-escaped digits (\e1 to \e9) can be
used to insert text matching corresponding parenthesized group
from the pattern. \e0 in "rewrite" refers to the entire matching
text. For example:
.sp
  string s = "yabba dabba doo";
  pcrecpp::RE("b+").Replace("d", &s);
.sp
will leave "s" containing "yada dabba doo". The result is true if the pattern
matches and a replacement occurs, false otherwise.
.P
\fBGlobalReplace\fP is like \fBReplace\fP except that it replaces all
occurrences of the pattern in the string with the rewrite. Replacements are
not subject to re-matching. For example:
.sp
  string s = "yabba dabba doo";
  pcrecpp::RE("b+").GlobalReplace("d", &s);
.sp
will leave "s" containing "yada dada doo". It returns the number of
replacements made.
.P
\fBExtract\fP is like \fBReplace\fP, except that if the pattern matches,
"rewrite" is copied into "out" (an additional argument) with substitutions.
The non-matching portions of "text" are ignored. Returns true iff a match
occurred and the extraction happened successfully;  if no match occurs, the
string is left unaffected.
.
.
.SH AUTHOR
.rs
.sp
.nf
The C++ wrapper was contributed by Google Inc.
Copyright (c) 2007 Google Inc.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 17 March 2009
.fi
usr/share/man/man3/pcre_info.3000064400000000745150403561450012131 0ustar00.TH PCRE_INFO 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_info(const pcre *\fIcode\fP, int *\fIoptptr\fP, int
.B *\fIfirstcharptr\fP);
.
.SH DESCRIPTION
.rs
.sp
This function is obsolete. You should be using \fBpcre_fullinfo()\fP instead.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcrepartial.3000064400000037636150403561450012504 0ustar00.TH PCREPARTIAL 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PARTIAL MATCHING IN PCRE"
.rs
.sp
In normal use of PCRE, if the subject string that is passed to
\fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP matches as far as it goes, but is
too short to match the entire pattern, PCRE_ERROR_NOMATCH is returned. There
are circumstances where it might be helpful to distinguish this case from other
cases in which there is no match.
.P
Consider, for example, an application where a human is required to type in data
for a field with specific formatting requirements. An example might be a date
in the form \fIddmmmyy\fP, defined by this pattern:
.sp
  ^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$
.sp
If the application sees the user's keystrokes one by one, and can check that
what has been typed so far is potentially valid, it is able to raise an error
as soon as a mistake is made, by beeping and not reflecting the character that
has been typed, for example. This immediate feedback is likely to be a better
user interface than a check that is delayed until the entire string has been
entered. Partial matching can also sometimes be useful when the subject string
is very long and is not all available at once.
.P
PCRE supports partial matching by means of the PCRE_PARTIAL_SOFT and
PCRE_PARTIAL_HARD options, which can be set when calling \fBpcre_exec()\fP or
\fBpcre_dfa_exec()\fP. For backwards compatibility, PCRE_PARTIAL is a synonym
for PCRE_PARTIAL_SOFT. The essential difference between the two options is
whether or not a partial match is preferred to an alternative complete match,
though the details differ between the two matching functions. If both options
are set, PCRE_PARTIAL_HARD takes precedence.
.P
Setting a partial matching option disables two of PCRE's optimizations. PCRE
remembers the last literal byte in a pattern, and abandons matching immediately
if such a byte is not present in the subject string. This optimization cannot
be used for a subject string that might match only partially. If the pattern
was studied, PCRE knows the minimum length of a matching string, and does not
bother to run the matching function on shorter strings. This optimization is
also disabled for partial matching.
.
.
.SH "PARTIAL MATCHING USING pcre_exec()"
.rs
.sp
A partial match occurs during a call to \fBpcre_exec()\fP whenever the end of
the subject string is reached successfully, but matching cannot continue
because more characters are needed. However, at least one character must have
been matched. (In other words, a partial match can never be an empty string.)
.P
If PCRE_PARTIAL_SOFT is set, the partial match is remembered, but matching
continues as normal, and other alternatives in the pattern are tried. If no
complete match can be found, \fBpcre_exec()\fP returns PCRE_ERROR_PARTIAL
instead of PCRE_ERROR_NOMATCH. If there are at least two slots in the offsets
vector, the first of them is set to the offset of the earliest character that
was inspected when the partial match was found. For convenience, the second
offset points to the end of the string so that a substring can easily be
identified.
.P
For the majority of patterns, the first offset identifies the start of the
partially matched string. However, for patterns that contain lookbehind
assertions, or \eK, or begin with \eb or \eB, earlier characters have been
inspected while carrying out the match. For example:
.sp
  /(?<=abc)123/
.sp
This pattern matches "123", but only if it is preceded by "abc". If the subject
string is "xyzabc12", the offsets after a partial match are for the substring
"abc12", because all these characters are needed if another match is tried
with extra characters added.
.P
If there is more than one partial match, the first one that was found provides
the data that is returned. Consider this pattern:
.sp
  /123\ew+X|dogY/
.sp
If this is matched against the subject string "abc123dog", both
alternatives fail to match, but the end of the subject is reached during
matching, so PCRE_ERROR_PARTIAL is returned instead of PCRE_ERROR_NOMATCH. The
offsets are set to 3 and 9, identifying "123dog" as the first partial match
that was found. (In this example, there are two partial matches, because "dog"
on its own partially matches the second alternative.)
.P
If PCRE_PARTIAL_HARD is set for \fBpcre_exec()\fP, it returns
PCRE_ERROR_PARTIAL as soon as a partial match is found, without continuing to
search for possible complete matches. The difference between the two options
can be illustrated by a pattern such as:
.sp
  /dog(sbody)?/
.sp
This matches either "dog" or "dogsbody", greedily (that is, it prefers the
longer string if possible). If it is matched against the string "dog" with
PCRE_PARTIAL_SOFT, it yields a complete match for "dog". However, if
PCRE_PARTIAL_HARD is set, the result is PCRE_ERROR_PARTIAL. On the other hand,
if the pattern is made ungreedy the result is different:
.sp
  /dog(sbody)??/
.sp
In this case the result is always a complete match because \fBpcre_exec()\fP
finds that first, and it never continues after finding a match. It might be
easier to follow this explanation by thinking of the two patterns like this:
.sp
  /dog(sbody)?/    is the same as  /dogsbody|dog/
  /dog(sbody)??/   is the same as  /dog|dogsbody/
.sp
The second pattern will never match "dogsbody" when \fBpcre_exec()\fP is
used, because it will always find the shorter match first.
.
.
.SH "PARTIAL MATCHING USING pcre_dfa_exec()"
.rs
.sp
The \fBpcre_dfa_exec()\fP function moves along the subject string character by
character, without backtracking, searching for all possible matches
simultaneously. If the end of the subject is reached before the end of the
pattern, there is the possibility of a partial match, again provided that at
least one character has matched.
.P
When PCRE_PARTIAL_SOFT is set, PCRE_ERROR_PARTIAL is returned only if there
have been no complete matches. Otherwise, the complete matches are returned.
However, if PCRE_PARTIAL_HARD is set, a partial match takes precedence over any
complete matches. The portion of the string that was inspected when the longest
partial match was found is set as the first matching string, provided there are
at least two slots in the offsets vector.
.P
Because \fBpcre_dfa_exec()\fP always searches for all possible matches, and
there is no difference between greedy and ungreedy repetition, its behaviour is
different from \fBpcre_exec\fP when PCRE_PARTIAL_HARD is set. Consider the
string "dog" matched against the ungreedy pattern shown above:
.sp
  /dog(sbody)??/
.sp
Whereas \fBpcre_exec()\fP stops as soon as it finds the complete match for
"dog", \fBpcre_dfa_exec()\fP also finds the partial match for "dogsbody", and
so returns that when PCRE_PARTIAL_HARD is set.
.
.
.SH "PARTIAL MATCHING AND WORD BOUNDARIES"
.rs
.sp
If a pattern ends with one of sequences \eb or \eB, which test for word
boundaries, partial matching with PCRE_PARTIAL_SOFT can give counter-intuitive
results. Consider this pattern:
.sp
  /\ebcat\eb/
.sp
This matches "cat", provided there is a word boundary at either end. If the
subject string is "the cat", the comparison of the final "t" with a following
character cannot take place, so a partial match is found. However,
\fBpcre_exec()\fP carries on with normal matching, which matches \eb at the end
of the subject when the last character is a letter, thus finding a complete
match. The result, therefore, is \fInot\fP PCRE_ERROR_PARTIAL. The same thing
happens with \fBpcre_dfa_exec()\fP, because it also finds the complete match.
.P
Using PCRE_PARTIAL_HARD in this case does yield PCRE_ERROR_PARTIAL, because
then the partial match takes precedence.
.
.
.SH "FORMERLY RESTRICTED PATTERNS"
.rs
.sp
For releases of PCRE prior to 8.00, because of the way certain internal
optimizations were implemented in the \fBpcre_exec()\fP function, the
PCRE_PARTIAL option (predecessor of PCRE_PARTIAL_SOFT) could not be used with
all patterns. From release 8.00 onwards, the restrictions no longer apply, and
partial matching with \fBpcre_exec()\fP can be requested for any pattern.
.P
Items that were formerly restricted were repeated single characters and
repeated metasequences. If PCRE_PARTIAL was set for a pattern that did not
conform to the restrictions, \fBpcre_exec()\fP returned the error code
PCRE_ERROR_BADPARTIAL (-13). This error code is no longer in use. The
PCRE_INFO_OKPARTIAL call to \fBpcre_fullinfo()\fP to find out if a compiled
pattern can be used for partial matching now always returns 1.
.
.
.SH "EXAMPLE OF PARTIAL MATCHING USING PCRETEST"
.rs
.sp
If the escape sequence \eP is present in a \fBpcretest\fP data line, the
PCRE_PARTIAL_SOFT option is used for the match. Here is a run of \fBpcretest\fP
that uses the date example quoted above:
.sp
    re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/
  data> 25jun04\eP
   0: 25jun04
   1: jun
  data> 25dec3\eP
  Partial match: 23dec3
  data> 3ju\eP
  Partial match: 3ju
  data> 3juj\eP
  No match
  data> j\eP
  No match
.sp
The first data string is matched completely, so \fBpcretest\fP shows the
matched substrings. The remaining four strings do not match the complete
pattern, but the first two are partial matches. Similar output is obtained
when \fBpcre_dfa_exec()\fP is used.
.P
If the escape sequence \eP is present more than once in a \fBpcretest\fP data
line, the PCRE_PARTIAL_HARD option is set for the match.
.
.
.SH "MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()"
.rs
.sp
When a partial match has been found using \fBpcre_dfa_exec()\fP, it is possible
to continue the match by providing additional subject data and calling
\fBpcre_dfa_exec()\fP again with the same compiled regular expression, this
time setting the PCRE_DFA_RESTART option. You must pass the same working
space as before, because this is where details of the previous partial match
are stored. Here is an example using \fBpcretest\fP, using the \eR escape
sequence to set the PCRE_DFA_RESTART option (\eD specifies the use of
\fBpcre_dfa_exec()\fP):
.sp
    re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/
  data> 23ja\eP\eD
  Partial match: 23ja
  data> n05\eR\eD
   0: n05
.sp
The first call has "23ja" as the subject, and requests partial matching; the
second call has "n05" as the subject for the continued (restarted) match.
Notice that when the match is complete, only the last part is shown; PCRE does
not retain the previously partially-matched string. It is up to the calling
program to do that if it needs to.
.P
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
facility can be used to pass very long subject strings to
\fBpcre_dfa_exec()\fP.
.
.
.SH "MULTI-SEGMENT MATCHING WITH pcre_exec()"
.rs
.sp
From release 8.00, \fBpcre_exec()\fP can also be used to do multi-segment
matching. Unlike \fBpcre_dfa_exec()\fP, it is not possible to restart the
previous match with a new segment of data. Instead, new data must be added to
the previous subject string, and the entire match re-run, starting from the
point where the partial match occurred. Earlier data can be discarded.
Consider an unanchored pattern that matches dates:
.sp
    re> /\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed/
  data> The date is 23ja\eP
  Partial match: 23ja
.sp
At this stage, an application could discard the text preceding "23ja", add on
text from the next segment, and call \fBpcre_exec()\fP again. Unlike
\fBpcre_dfa_exec()\fP, the entire matching string must always be available, and
the complete matching process occurs for each call, so more memory and more
processing time is needed.
.P
\fBNote:\fP If the pattern contains lookbehind assertions, or \eK, or starts
with \eb or \eB, the string that is returned for a partial match will include
characters that precede the partially matched string itself, because these must
be retained when adding on more characters for a subsequent matching attempt.
.
.
.SH "ISSUES WITH MULTI-SEGMENT MATCHING"
.rs
.sp
Certain types of pattern may give problems with multi-segment matching,
whichever matching function is used.
.P
1. If the pattern contains tests for the beginning or end of a line, you need
to pass the PCRE_NOTBOL or PCRE_NOTEOL options, as appropriate, when the
subject string for any call does not contain the beginning or end of a line.
.P
2. Lookbehind assertions at the start of a pattern are catered for in the
offsets that are returned for a partial match. However, in theory, a lookbehind
assertion later in the pattern could require even earlier characters to be
inspected, and it might not have been reached when a partial match occurs. This
is probably an extremely unlikely case; you could guard against it to a certain
extent by always including extra characters at the start.
.P
3. Matching a subject string that is split into multiple segments may not
always produce exactly the same result as matching over one single long string,
especially when PCRE_PARTIAL_SOFT is used. The section "Partial Matching and
Word Boundaries" above describes an issue that arises if the pattern ends with
\eb or \eB. Another kind of difference may occur when there are multiple
matching possibilities, because a partial match result is given only when there
are no completed matches. This means that as soon as the shortest match has
been found, continuation to a new subject segment is no longer possible.
Consider again this \fBpcretest\fP example:
.sp
    re> /dog(sbody)?/
  data> dogsb\eP
   0: dog
  data> do\eP\eD
  Partial match: do
  data> gsb\eR\eP\eD
   0: g
  data> dogsbody\eD
   0: dogsbody
   1: dog
.sp
The first data line passes the string "dogsb" to \fBpcre_exec()\fP, setting the
PCRE_PARTIAL_SOFT option. Although the string is a partial match for
"dogsbody", the result is not PCRE_ERROR_PARTIAL, because the shorter string
"dog" is a complete match. Similarly, when the subject is presented to
\fBpcre_dfa_exec()\fP in several parts ("do" and "gsb" being the first two) the
match stops when "dog" has been found, and it is not possible to continue. On
the other hand, if "dogsbody" is presented as a single string,
\fBpcre_dfa_exec()\fP finds both matches.
.P
Because of these problems, it is probably best to use PCRE_PARTIAL_HARD when
matching multi-segment data. The example above then behaves differently:
.sp
    re> /dog(sbody)?/
  data> dogsb\eP\eP
  Partial match: dogsb
  data> do\eP\eD
  Partial match: do
  data> gsb\eR\eP\eP\eD
  Partial match: gsb
.sp
.P
4. Patterns that contain alternatives at the top level which do not all
start with the same pattern item may not work as expected when
PCRE_DFA_RESTART is used with \fBpcre_dfa_exec()\fP. For example, consider this
pattern:
.sp
  1234|3789
.sp
If the first part of the subject is "ABC123", a partial match of the first
alternative is found at offset 3. There is no partial match for the second
alternative, because such a match does not start at the same point in the
subject string. Attempting to continue with the string "7890" does not yield a
match because only those alternatives that match at one point in the subject
are remembered. The problem arises because the start of the second alternative
matches within the first alternative. There is no problem with anchored
patterns or patterns such as:
.sp
  1234|ABCD
.sp
where no string can be a partial match for both alternatives. This is not a
problem if \fBpcre_exec()\fP is used, because the entire match has to be rerun
each time:
.sp
    re> /1234|3789/
  data> ABC123\eP
  Partial match: 123
  data> 1237890
   0: 3789
.sp
Of course, instead of using PCRE_DFA_PARTIAL, the same technique of re-running
the entire match can also be used with \fBpcre_dfa_exec()\fP. Another
possibility is to work with two buffers. If a partial match at offset \fIn\fP
in the first buffer is followed by "no match" when PCRE_DFA_RESTART is used on
the second buffer, you can then try a new match starting at offset \fIn+1\fP in
the first buffer.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 19 October 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man3/pcrebuild.3000064400000031114150403561450012130 0ustar00.TH PCREBUILD 3
.SH NAME
PCRE - Perl-compatible regular expressions
.
.
.SH "PCRE BUILD-TIME OPTIONS"
.rs
.sp
This document describes the optional features of PCRE that can be selected when
the library is compiled. It assumes use of the \fBconfigure\fP script, where
the optional features are selected or deselected by providing options to
\fBconfigure\fP before running the \fBmake\fP command. However, the same
options can be selected in both Unix-like and non-Unix-like environments using
the GUI facility of \fBcmake-gui\fP if you are using \fBCMake\fP instead of
\fBconfigure\fP to build PCRE.
.P
There is a lot more information about building PCRE in non-Unix-like
environments in the file called \fINON_UNIX_USE\fP, which is part of the PCRE
distribution. You should consult this file as well as the \fIREADME\fP file if
you are building in a non-Unix-like environment.
.P
The complete list of options for \fBconfigure\fP (which includes the standard
ones such as the selection of the installation directory) can be obtained by
running
.sp
  ./configure --help
.sp
The following sections include descriptions of options whose names begin with
--enable or --disable. These settings specify changes to the defaults for the
\fBconfigure\fP command. Because of the way that \fBconfigure\fP works,
--enable and --disable always come in pairs, so the complementary option always
exists as well, but as it specifies the default, it is not described.
.
.
.SH "C++ SUPPORT"
.rs
.sp
By default, the \fBconfigure\fP script will search for a C++ compiler and C++
header files. If it finds them, it automatically builds the C++ wrapper library
for PCRE. You can disable this by adding
.sp
  --disable-cpp
.sp
to the \fBconfigure\fP command.
.
.
.SH "UTF-8 SUPPORT"
.rs
.sp
To build PCRE with support for UTF-8 Unicode character strings, add
.sp
  --enable-utf8
.sp
to the \fBconfigure\fP command. Of itself, this does not make PCRE treat
strings as UTF-8. As well as compiling PCRE with this option, you also have
have to set the PCRE_UTF8 option when you call the \fBpcre_compile()\fP
or \fBpcre_compile2()\fP functions.
.P
If you set --enable-utf8 when compiling in an EBCDIC environment, PCRE expects
its input to be either ASCII or UTF-8 (depending on the runtime option). It is
not possible to support both EBCDIC and UTF-8 codes in the same version of the
library. Consequently, --enable-utf8 and --enable-ebcdic are mutually
exclusive.
.
.
.SH "UNICODE CHARACTER PROPERTY SUPPORT"
.rs
.sp
UTF-8 support allows PCRE to process character values greater than 255 in the
strings that it handles. On its own, however, it does not provide any
facilities for accessing the properties of such characters. If you want to be
able to use the pattern escapes \eP, \ep, and \eX, which refer to Unicode
character properties, you must add
.sp
  --enable-unicode-properties
.sp
to the \fBconfigure\fP command. This implies UTF-8 support, even if you have
not explicitly requested it.
.P
Including Unicode property support adds around 30K of tables to the PCRE
library. Only the general category properties such as \fILu\fP and \fINd\fP are
supported. Details are given in the
.\" HREF
\fBpcrepattern\fP
.\"
documentation.
.
.
.SH "CODE VALUE OF NEWLINE"
.rs
.sp
By default, PCRE interprets the linefeed (LF) character as indicating the end
of a line. This is the normal newline character on Unix-like systems. You can
compile PCRE to use carriage return (CR) instead, by adding
.sp
  --enable-newline-is-cr
.sp
to the \fBconfigure\fP command. There is also a --enable-newline-is-lf option,
which explicitly specifies linefeed as the newline character.
.sp
Alternatively, you can specify that line endings are to be indicated by the two
character sequence CRLF. If you want this, add
.sp
  --enable-newline-is-crlf
.sp
to the \fBconfigure\fP command. There is a fourth option, specified by
.sp
  --enable-newline-is-anycrlf
.sp
which causes PCRE to recognize any of the three sequences CR, LF, or CRLF as
indicating a line ending. Finally, a fifth option, specified by
.sp
  --enable-newline-is-any
.sp
causes PCRE to recognize any Unicode newline sequence.
.P
Whatever line ending convention is selected when PCRE is built can be
overridden when the library functions are called. At build time it is
conventional to use the standard for your operating system.
.
.
.SH "WHAT \eR MATCHES"
.rs
.sp
By default, the sequence \eR in a pattern matches any Unicode newline sequence,
whatever has been selected as the line ending sequence. If you specify
.sp
  --enable-bsr-anycrlf
.sp
the default is changed so that \eR matches only CR, LF, or CRLF. Whatever is
selected when PCRE is built can be overridden when the library functions are
called.
.
.
.SH "BUILDING SHARED AND STATIC LIBRARIES"
.rs
.sp
The PCRE building process uses \fBlibtool\fP to build both shared and static
Unix libraries by default. You can suppress one of these by adding one of
.sp
  --disable-shared
  --disable-static
.sp
to the \fBconfigure\fP command, as required.
.
.
.SH "POSIX MALLOC USAGE"
.rs
.sp
When PCRE is called through the POSIX interface (see the
.\" HREF
\fBpcreposix\fP
.\"
documentation), additional working storage is required for holding the pointers
to capturing substrings, because PCRE requires three integers per substring,
whereas the POSIX interface provides only two. If the number of expected
substrings is small, the wrapper function uses space on the stack, because this
is faster than using \fBmalloc()\fP for each call. The default threshold above
which the stack is no longer used is 10; it can be changed by adding a setting
such as
.sp
  --with-posix-malloc-threshold=20
.sp
to the \fBconfigure\fP command.
.
.
.SH "HANDLING VERY LARGE PATTERNS"
.rs
.sp
Within a compiled pattern, offset values are used to point from one part to
another (for example, from an opening parenthesis to an alternation
metacharacter). By default, two-byte values are used for these offsets, leading
to a maximum size for a compiled pattern of around 64K. This is sufficient to
handle all but the most gigantic patterns. Nevertheless, some people do want to
process truyl enormous patterns, so it is possible to compile PCRE to use
three-byte or four-byte offsets by adding a setting such as
.sp
  --with-link-size=3
.sp
to the \fBconfigure\fP command. The value given must be 2, 3, or 4. Using
longer offsets slows down the operation of PCRE because it has to load
additional bytes when handling them.
.
.
.SH "AVOIDING EXCESSIVE STACK USAGE"
.rs
.sp
When matching with the \fBpcre_exec()\fP function, PCRE implements backtracking
by making recursive calls to an internal function called \fBmatch()\fP. In
environments where the size of the stack is limited, this can severely limit
PCRE's operation. (The Unix environment does not usually suffer from this
problem, but it may sometimes be necessary to increase the maximum stack size.
There is a discussion in the
.\" HREF
\fBpcrestack\fP
.\"
documentation.) An alternative approach to recursion that uses memory from the
heap to remember data, instead of using recursive function calls, has been
implemented to work round the problem of limited stack size. If you want to
build a version of PCRE that works this way, add
.sp
  --disable-stack-for-recursion
.sp
to the \fBconfigure\fP command. With this configuration, PCRE will use the
\fBpcre_stack_malloc\fP and \fBpcre_stack_free\fP variables to call memory
management functions. By default these point to \fBmalloc()\fP and
\fBfree()\fP, but you can replace the pointers so that your own functions are
used instead.
.P
Separate functions are provided rather than using \fBpcre_malloc\fP and
\fBpcre_free\fP because the usage is very predictable: the block sizes
requested are always the same, and the blocks are always freed in reverse
order. A calling program might be able to implement optimized functions that
perform better than \fBmalloc()\fP and \fBfree()\fP. PCRE runs noticeably more
slowly when built in this way. This option affects only the \fBpcre_exec()\fP
function; it is not relevant for \fBpcre_dfa_exec()\fP.
.
.
.SH "LIMITING PCRE RESOURCE USAGE"
.rs
.sp
Internally, PCRE has a function called \fBmatch()\fP, which it calls repeatedly
(sometimes recursively) when matching a pattern with the \fBpcre_exec()\fP
function. By controlling the maximum number of times this function may be
called during a single matching operation, a limit can be placed on the
resources used by a single call to \fBpcre_exec()\fP. The limit can be changed
at run time, as described in the
.\" HREF
\fBpcreapi\fP
.\"
documentation. The default is 10 million, but this can be changed by adding a
setting such as
.sp
  --with-match-limit=500000
.sp
to the \fBconfigure\fP command. This setting has no effect on the
\fBpcre_dfa_exec()\fP matching function.
.P
In some environments it is desirable to limit the depth of recursive calls of
\fBmatch()\fP more strictly than the total number of calls, in order to
restrict the maximum amount of stack (or heap, if --disable-stack-for-recursion
is specified) that is used. A second limit controls this; it defaults to the
value that is set for --with-match-limit, which imposes no additional
constraints. However, you can set a lower limit by adding, for example,
.sp
  --with-match-limit-recursion=10000
.sp
to the \fBconfigure\fP command. This value can also be overridden at run time.
.
.
.SH "CREATING CHARACTER TABLES AT BUILD TIME"
.rs
.sp
PCRE uses fixed tables for processing characters whose code values are less
than 256. By default, PCRE is built with a set of tables that are distributed
in the file \fIpcre_chartables.c.dist\fP. These tables are for ASCII codes
only. If you add
.sp
  --enable-rebuild-chartables
.sp
to the \fBconfigure\fP command, the distributed tables are no longer used.
Instead, a program called \fBdftables\fP is compiled and run. This outputs the
source for new set of tables, created in the default locale of your C runtime
system. (This method of replacing the tables does not work if you are cross
compiling, because \fBdftables\fP is run on the local host. If you need to
create alternative tables when cross compiling, you will have to do so "by
hand".)
.
.
.SH "USING EBCDIC CODE"
.rs
.sp
PCRE assumes by default that it will run in an environment where the character
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
most computer operating systems. PCRE can, however, be compiled to run in an
EBCDIC environment by adding
.sp
  --enable-ebcdic
.sp
to the \fBconfigure\fP command. This setting implies
--enable-rebuild-chartables. You should only use it if you know that you are in
an EBCDIC environment (for example, an IBM mainframe operating system). The
--enable-ebcdic option is incompatible with --enable-utf8.
.
.
.SH "PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT"
.rs
.sp
By default, \fBpcregrep\fP reads all files as plain text. You can build it so
that it recognizes files whose names end in \fB.gz\fP or \fB.bz2\fP, and reads
them with \fBlibz\fP or \fBlibbz2\fP, respectively, by adding one or both of
.sp
  --enable-pcregrep-libz
  --enable-pcregrep-libbz2
.sp
to the \fBconfigure\fP command. These options naturally require that the
relevant libraries are installed on your system. Configuration will fail if
they are not.
.
.
.SH "PCRETEST OPTION FOR LIBREADLINE SUPPORT"
.rs
.sp
If you add
.sp
  --enable-pcretest-libreadline
.sp
to the \fBconfigure\fP command, \fBpcretest\fP is linked with the
\fBlibreadline\fP library, and when its input is from a terminal, it reads it
using the \fBreadline()\fP function. This provides line-editing and history
facilities. Note that \fBlibreadline\fP is GPL-licensed, so if you distribute a
binary of \fBpcretest\fP linked in this way, there may be licensing issues.
.P
Setting this option causes the \fB-lreadline\fP option to be added to the
\fBpcretest\fP build. In many operating environments with a sytem-installed
\fBlibreadline\fP this is sufficient. However, in some environments (e.g.
if an unmodified distribution version of readline is in use), some extra
configuration may be necessary. The INSTALL file for \fBlibreadline\fP says
this:
.sp
  "Readline uses the termcap functions, but does not link with the
  termcap or curses library itself, allowing applications which link
  with readline the to choose an appropriate library."
.sp
If your environment has not been set up so that an appropriate library is
automatically included, you may need to add something like
.sp
  LIBS="-ncurses"
.sp
immediately before the \fBconfigure\fP command.
.
.
.SH "SEE ALSO"
.rs
.sp
\fBpcreapi\fP(3), \fBpcre_config\fP(3).
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 29 September 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man3/pcre_get_substring_list.3000064400000002405150403561450015103 0ustar00.TH PCRE_GET_SUBSTRING_LIST 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_get_substring_list(const char *\fIsubject\fP,
.ti +5n
.B int *\fIovector\fP, int \fIstringcount\fP, "const char ***\fIlistptr\fP);"
.
.SH DESCRIPTION
.rs
.sp
This is a convenience function for extracting a list of all the captured
substrings. The arguments are:
.sp
  \fIsubject\fP       Subject that has been successfully matched
  \fIovector\fP       Offset vector that \fBpcre_exec\fP used
  \fIstringcount\fP   Value returned by \fBpcre_exec\fP
  \fIlistptr\fP       Where to put a pointer to the list
.sp
The memory in which the substrings and the list are placed is obtained by
calling \fBpcre_malloc()\fP. The convenience function
\fBpcre_free_substring_list()\fP can be used to free it when it is no longer
needed. A pointer to a list of pointers is put in the variable whose address is
in \fIlistptr\fP. The list is terminated by a NULL pointer. The yield of the
function is zero on success or PCRE_ERROR_NOMEMORY if sufficient memory could
not be obtained.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre.3000064400000030551150403561450011114 0ustar00.TH PCRE 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH INTRODUCTION
.rs
.sp
The PCRE library is a set of functions that implement regular expression
pattern matching using the same syntax and semantics as Perl, with just a few
differences. Some features that appeared in Python and PCRE before they
appeared in Perl are also available using the Python syntax, there is some
support for one or two .NET and Oniguruma syntax items, and there is an option
for requesting some minor changes that give better JavaScript compatibility.
.P
The current implementation of PCRE corresponds approximately with Perl 5.10,
including support for UTF-8 encoded strings and Unicode general category
properties. However, UTF-8 and Unicode support has to be explicitly enabled; it
is not the default. The Unicode tables correspond to Unicode release 5.2.0.
.P
In addition to the Perl-compatible matching function, PCRE contains an
alternative function that matches the same compiled patterns in a different
way. In certain circumstances, the alternative function has some advantages.
For a discussion of the two matching algorithms, see the
.\" HREF
\fBpcrematching\fP
.\"
page.
.P
PCRE is written in C and released as a C library. A number of people have
written wrappers and interfaces of various kinds. In particular, Google Inc.
have provided a comprehensive C++ wrapper. This is now included as part of the
PCRE distribution. The
.\" HREF
\fBpcrecpp\fP
.\"
page has details of this interface. Other people's contributions can be found
in the \fIContrib\fR directory at the primary FTP site, which is:
.sp
.\" HTML <a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">
.\" </a>
ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre
.P
Details of exactly which Perl regular expression features are and are not
supported by PCRE are given in separate documents. See the
.\" HREF
\fBpcrepattern\fR
.\"
and
.\" HREF
\fBpcrecompat\fR
.\"
pages. There is a syntax summary in the
.\" HREF
\fBpcresyntax\fR
.\"
page.
.P
Some features of PCRE can be included, excluded, or changed when the library is
built. The
.\" HREF
\fBpcre_config()\fR
.\"
function makes it possible for a client to discover which features are
available. The features themselves are described in the
.\" HREF
\fBpcrebuild\fP
.\"
page. Documentation about building PCRE for various operating systems can be
found in the \fBREADME\fP and \fBNON-UNIX-USE\fP files in the source
distribution.
.P
The library contains a number of undocumented internal functions and data
tables that are used by more than one of the exported external functions, but
which are not intended for use by external callers. Their names all begin with
"_pcre_", which hopefully will not provoke any name clashes. In some
environments, it is possible to control which external symbols are exported
when a shared library is built, and in these cases the undocumented symbols are
not exported.
.
.
.SH "USER DOCUMENTATION"
.rs
.sp
The user documentation for PCRE comprises a number of different sections. In
the "man" format, each of these is a separate "man page". In the HTML format,
each is a separate page, linked from the index page. In the plain text format,
all the sections, except the \fBpcredemo\fP section, are concatenated, for ease
of searching. The sections are as follows:
.sp
  pcre              this document
  pcre-config       show PCRE installation configuration information
  pcreapi           details of PCRE's native C API
  pcrebuild         options for building PCRE
  pcrecallout       details of the callout feature
  pcrecompat        discussion of Perl compatibility
  pcrecpp           details of the C++ wrapper
  pcredemo          a demonstration C program that uses PCRE
  pcregrep          description of the \fBpcregrep\fP command
  pcrematching      discussion of the two matching algorithms
  pcrepartial       details of the partial matching facility
.\" JOIN
  pcrepattern       syntax and semantics of supported
                      regular expressions
  pcreperform       discussion of performance issues
  pcreposix         the POSIX-compatible C API
  pcreprecompile    details of saving and re-using precompiled patterns
  pcresample        discussion of the pcredemo program
  pcrestack         discussion of stack usage
  pcresyntax        quick syntax reference
  pcretest          description of the \fBpcretest\fP testing command
.sp
In addition, in the "man" and HTML formats, there is a short page for each
C library function, listing its arguments and results.
.
.
.SH LIMITATIONS
.rs
.sp
There are some size limitations in PCRE but it is hoped that they will never in
practice be relevant.
.P
The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
compiled with the default internal linkage size of 2. If you want to process
regular expressions that are truly enormous, you can compile PCRE with an
internal linkage size of 3 or 4 (see the \fBREADME\fP file in the source
distribution and the
.\" HREF
\fBpcrebuild\fP
.\"
documentation for details). In these cases the limit is substantially larger.
However, the speed of execution is slower.
.P
All values in repeating quantifiers must be less than 65536.
.P
There is no limit to the number of parenthesized subpatterns, but there can be
no more than 65535 capturing subpatterns.
.P
The maximum length of name for a named subpattern is 32 characters, and the
maximum number of named subpatterns is 10000.
.P
The maximum length of a subject string is the largest positive number that an
integer variable can hold. However, when using the traditional matching
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
This means that the available stack space may limit the size of a subject
string that can be processed by certain patterns. For a discussion of stack
issues, see the
.\" HREF
\fBpcrestack\fP
.\"
documentation.
.
.
.\" HTML <a name="utf8support"></a>
.
.SH "UTF-8 AND UNICODE PROPERTY SUPPORT"
.rs
.sp
From release 3.3, PCRE has had some support for character strings encoded in
the UTF-8 format. For release 4.0 this was greatly extended to cover most
common requirements, and in release 5.0 additional support for Unicode general
category properties was added.
.P
In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
the code, and, in addition, you must call
.\" HREF
\fBpcre_compile()\fP
.\"
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
(*UTF8). When either of these is the case, both the pattern and any subject
strings that are matched against it are treated as UTF-8 strings instead of
strings of 1-byte characters.
.P
If you compile PCRE with UTF-8 support, but do not use it at run time, the
library will be a bit bigger, but the additional run time overhead is limited
to testing the PCRE_UTF8 flag occasionally, so should not be very big.
.P
If PCRE is built with Unicode character property support (which implies UTF-8
support), the escape sequences \ep{..}, \eP{..}, and \eX are supported.
The available properties that can be tested are limited to the general
category properties such as Lu for an upper case letter or Nd for a decimal
number, the Unicode script names such as Arabic or Han, and the derived
properties Any and L&. A full list is given in the
.\" HREF
\fBpcrepattern\fP
.\"
documentation. Only the short names for properties are supported. For example,
\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported.
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
compatibility with Perl 5.6. PCRE does not support this.
.
.
.\" HTML <a name="utf8strings"></a>
.
.SS "Validity of UTF-8 strings"
.rs
.sp
When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
are (by default) checked for validity on entry to the relevant functions. From
release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
themselves derived from the Unicode specification. Earlier releases of PCRE
followed the rules of RFC 2279, which allows the full range of 31-bit values (0
to 0x7FFFFFFF). The current check allows only values in the range U+0 to
U+10FFFF, excluding U+D800 to U+DFFF.
.P
The excluded code points are the "Low Surrogate Area" of Unicode, of which the
Unicode Standard says this: "The Low Surrogate Area does not contain any
character assignments, consequently no character code charts or namelists are
provided for this area. Surrogates are reserved for use with UTF-16 and then
must be used in pairs." The code points that are encoded by UTF-16 pairs are
available as independent code points in the UTF-8 encoding. (In other words,
the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
UTF-8.)
.P
If an invalid UTF-8 string is passed to PCRE, an error return
(PCRE_ERROR_BADUTF8) is given. In some situations, you may already know that
your strings are valid, and therefore want to skip these checks in order to
improve performance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or
at run time, PCRE assumes that the pattern or subject it is given
(respectively) contains only valid UTF-8 codes. In this case, it does not
diagnose an invalid UTF-8 string.
.P
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
happens depends on why the string is invalid. If the string conforms to the
"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
rules of RFC 2279. However, if the string does not even conform to RFC 2279,
the result is undefined. Your program may crash.
.P
If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
encoded in a UTF-8-like manner as per the old RFC, you can set
PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
situation, you will have to apply your own validity check.
.
.SS "General comments about UTF-8 mode"
.rs
.sp
1. An unbraced hexadecimal escape sequence (such as \exb3) matches a two-byte
UTF-8 character if the value is greater than 127.
.P
2. Octal numbers up to \e777 are recognized, and match two-byte UTF-8
characters for values greater than \e177.
.P
3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
bytes, for example: \ex{100}{3}.
.P
4. The dot metacharacter matches one UTF-8 character instead of a single byte.
.P
5. The escape sequence \eC can be used to match a single byte in UTF-8 mode,
but its use can lead to some strange effects. This facility is not available in
the alternative matching function, \fBpcre_dfa_exec()\fP.
.P
6. The character escapes \eb, \eB, \ed, \eD, \es, \eS, \ew, and \eW correctly
test characters of any code value, but the characters that PCRE recognizes as
digits, spaces, or word characters remain the same set as before, all with
values less than 256. This remains true even when PCRE includes Unicode
property support, because to do otherwise would slow down PCRE in many common
cases. If you really want to test for a wider sense of, say, "digit", you
must use Unicode property tests such as \ep{Nd}. Note that this also applies to
\eb, because it is defined in terms of \ew and \eW.
.P
7. Similarly, characters that match the POSIX named character classes are all
low-valued characters.
.P
8. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
(\eh, \eH, \ev, and \eV) do match all the appropriate Unicode characters.
.P
9. Case-insensitive matching applies only to characters whose values are less
than 128, unless PCRE is built with Unicode property support. Even when Unicode
property support is available, PCRE still uses its own character tables when
checking the case of low-valued characters, so as not to degrade performance.
The Unicode property information is used only for characters with higher
values. Even when Unicode property support is available, PCRE supports
case-insensitive matching only when there is a one-to-one mapping between a
letter's cases. There are a small number of many-to-one mappings in Unicode;
these are not supported by PCRE.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.P
Putting an actual email address here seems to have been a spam magnet, so I've
taken it away. If you want to email me, use my two initials, followed by the
two digits 10, at the domain cam.ac.uk.
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 01 March 2010
Copyright (c) 1997-2010 University of Cambridge.
.fi
usr/share/man/man3/pcreperform.3000064400000015050150403561450012504 0ustar00.TH PCREPERFORM 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE PERFORMANCE"
.rs
.sp
Two aspects of performance are discussed below: memory usage and processing
time. The way you express your pattern as a regular expression can affect both
of them.
.
.SH "COMPILED PATTERN MEMORY USAGE"
.rs
.sp
Patterns are compiled by PCRE into a reasonably efficient byte code, so that
most simple patterns do not use much memory. However, there is one case where
the memory usage of a compiled pattern can be unexpectedly large. If a
parenthesized subpattern has a quantifier with a minimum greater than 1 and/or
a limited maximum, the whole subpattern is repeated in the compiled code. For
example, the pattern
.sp
  (abc|def){2,4}
.sp
is compiled as if it were
.sp
  (abc|def)(abc|def)((abc|def)(abc|def)?)?
.sp
(Technical aside: It is done this way so that backtrack points within each of
the repetitions can be independently maintained.)
.P
For regular expressions whose quantifiers use only small numbers, this is not
usually a problem. However, if the numbers are large, and particularly if such
repetitions are nested, the memory usage can become an embarrassment. For
example, the very simple pattern
.sp
  ((ab){1,1000}c){1,3}
.sp
uses 51K bytes when compiled. When PCRE is compiled with its default internal
pointer size of two bytes, the size limit on a compiled pattern is 64K, and
this is reached with the above pattern if the outer repetition is increased
from 3 to 4. PCRE can be compiled to use larger internal pointers and thus
handle larger compiled patterns, but it is better to try to rewrite your
pattern to use less memory if you can.
.P
One way of reducing the memory usage for such patterns is to make use of PCRE's
.\" HTML <a href="pcrepattern.html#subpatternsassubroutines">
.\" </a>
"subroutine"
.\"
facility. Re-writing the above pattern as
.sp
  ((ab)(?2){0,999}c)(?1){0,2}
.sp
reduces the memory requirements to 18K, and indeed it remains under 20K even
with the outer repetition increased to 100. However, this pattern is not
exactly equivalent, because the "subroutine" calls are treated as
.\" HTML <a href="pcrepattern.html#atomicgroup">
.\" </a>
atomic groups
.\"
into which there can be no backtracking if there is a subsequent matching
failure. Therefore, PCRE cannot do this kind of rewriting automatically.
Furthermore, there is a noticeable loss of speed when executing the modified
pattern. Nevertheless, if the atomic grouping is not a problem and the loss of
speed is acceptable, this kind of rewriting will allow you to process patterns
that PCRE cannot otherwise handle.
.
.
.SH "STACK USAGE AT RUN TIME"
.rs
.sp
When \fBpcre_exec()\fP is used for matching, certain kinds of pattern can cause
it to use large amounts of the process stack. In some environments the default
process stack is quite small, and if it runs out the result is often SIGSEGV.
This issue is probably the most frequently raised problem with PCRE. Rewriting
your pattern can often help. The
.\" HREF
\fBpcrestack\fP
.\"
documentation discusses this issue in detail.
.
.
.SH "PROCESSING TIME"
.rs
.sp
Certain items in regular expression patterns are processed more efficiently
than others. It is more efficient to use a character class like [aeiou] than a
set of single-character alternatives such as (a|e|i|o|u). In general, the
simplest construction that provides the required behaviour is usually the most
efficient. Jeffrey Friedl's book contains a lot of useful general discussion
about optimizing regular expressions for efficient performance. This document
contains a few observations about PCRE.
.P
Using Unicode character properties (the \ep, \eP, and \eX escapes) is slow,
because PCRE has to scan a structure that contains data for over fifteen
thousand characters whenever it needs a character's property. If you can find
an alternative pattern that does not use character properties, it will probably
be faster.
.P
When a pattern begins with .* not in parentheses, or in parentheses that are
not the subject of a backreference, and the PCRE_DOTALL option is set, the
pattern is implicitly anchored by PCRE, since it can match only at the start of
a subject string. However, if PCRE_DOTALL is not set, PCRE cannot make this
optimization, because the . metacharacter does not then match a newline, and if
the subject string contains newlines, the pattern may match from the character
immediately following one of them instead of from the very start. For example,
the pattern
.sp
  .*second
.sp
matches the subject "first\enand second" (where \en stands for a newline
character), with the match starting at the seventh character. In order to do
this, PCRE has to retry the match starting after every newline in the subject.
.P
If you are using such a pattern with subject strings that do not contain
newlines, the best performance is obtained by setting PCRE_DOTALL, or starting
the pattern with ^.* or ^.*? to indicate explicit anchoring. That saves PCRE
from having to scan along the subject looking for a newline to restart at.
.P
Beware of patterns that contain nested indefinite repeats. These can take a
long time to run when applied to a string that does not match. Consider the
pattern fragment
.sp
  ^(a+)*
.sp
This can match "aaaa" in 16 different ways, and this number increases very
rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4
times, and for each of those cases other than 0 or 4, the + repeats can match
different numbers of times.) When the remainder of the pattern is such that the
entire match is going to fail, PCRE has in principle to try every possible
variation, and this can take an extremely long time, even for relatively short
strings.
.P
An optimization catches some of the more simple cases such as
.sp
  (a+)*b
.sp
where a literal character follows. Before embarking on the standard matching
procedure, PCRE checks that there is a "b" later in the subject string, and if
there is not, it fails the match immediately. However, when there is no
following literal this optimization cannot be used. You can see the difference
by comparing the behaviour of
.sp
  (a+)*\ed
.sp
with the pattern above. The former gives a failure almost instantly when
applied to a whole line of "a" characters, whereas the latter takes an
appreciable time with strings longer than about 20 characters.
.P
In many cases, the solution to this kind of performance issue is to use an
atomic group or a possessive quantifier.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 07 March 2010
Copyright (c) 1997-2010 University of Cambridge.
.fi
usr/share/man/man3/pcresample.3000064400000005323150403561450012315 0ustar00.TH PCRESAMPLE 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE SAMPLE PROGRAM"
.rs
.sp
A simple, complete demonstration program, to get you started with using PCRE,
is supplied in the file \fIpcredemo.c\fP in the PCRE distribution. A listing of
this program is given in the
.\" HREF
\fBpcredemo\fP
.\"
documentation. If you do not have a copy of the PCRE distribution, you can save
this listing to re-create \fIpcredemo.c\fP.
.P
The program compiles the regular expression that is its first argument, and
matches it against the subject string in its second argument. No PCRE options
are set, and default character tables are used. If matching succeeds, the
program outputs the portion of the subject that matched, together with the
contents of any captured substrings.
.P
If the -g option is given on the command line, the program then goes on to
check for further matches of the same regular expression in the same subject
string. The logic is a little bit tricky because of the possibility of matching
an empty string. Comments in the code explain what is going on.
.P
If PCRE is installed in the standard include and library directories for your
operating system, you should be able to compile the demonstration program using
this command:
.sp
  gcc -o pcredemo pcredemo.c -lpcre
.sp
If PCRE is installed elsewhere, you may need to add additional options to the
command line. For example, on a Unix-like system that has PCRE installed in
\fI/usr/local\fP, you can compile the demonstration program using a command
like this:
.sp
.\" JOINSH
  gcc -o pcredemo -I/usr/local/include pcredemo.c \e
      -L/usr/local/lib -lpcre
.sp
Once you have compiled the demonstration program, you can run simple tests like
this:
.sp
  ./pcredemo 'cat|dog' 'the cat sat on the mat'
  ./pcredemo -g 'cat|dog' 'the dog sat on the cat'
.sp
Note that there is a much more comprehensive test program, called
.\" HREF
\fBpcretest\fP,
.\"
which supports many more facilities for testing regular expressions and the
PCRE library. The
.\" HREF
\fBpcredemo\fP
.\"
program is provided as a simple coding example.
.P
When you try to run
.\" HREF
\fBpcredemo\fP
.\"
when PCRE is not installed in the standard library directory, you may get an
error like this on some operating systems (e.g. Solaris):
.sp
  ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such file or directory
.sp
This is caused by the way shared library support works on those systems. You
need to add
.sp
  -R/usr/local/lib
.sp
(for example) to the compile command to get round this problem.
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 30 September 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man3/pcre_copy_substring.3000064400000002122150403561450014237 0ustar00.TH PCRE_COPY_SUBSTRING 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_copy_substring(const char *\fIsubject\fP, int *\fIovector\fP,
.ti +5n
.B int \fIstringcount\fP, int \fIstringnumber\fP, char *\fIbuffer\fP,
.ti +5n
.B int \fIbuffersize\fP);
.
.SH DESCRIPTION
.rs
.sp
This is a convenience function for extracting a captured substring into a given
buffer. The arguments are:
.sp
  \fIsubject\fP       Subject that has been successfully matched
  \fIovector\fP       Offset vector that \fBpcre_exec()\fP used
  \fIstringcount\fP   Value returned by \fBpcre_exec()\fP
  \fIstringnumber\fP  Number of the required substring
  \fIbuffer\fP        Buffer to receive the string
  \fIbuffersize\fP    Size of buffer
.sp
The yield is the length of the string, PCRE_ERROR_NOMEMORY if the buffer was
too small, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man3/pcre_fullinfo.3000064400000004275150403561450013016 0ustar00.TH PCRE_FULLINFO 3
.SH NAME
PCRE - Perl-compatible regular expressions
.SH SYNOPSIS
.rs
.sp
.B #include <pcre.h>
.PP
.SM
.B int pcre_fullinfo(const pcre *\fIcode\fP, "const pcre_extra *\fIextra\fP,"
.ti +5n
.B int \fIwhat\fP, void *\fIwhere\fP);
.
.SH DESCRIPTION
.rs
.sp
This function returns information about a compiled pattern. Its arguments are:
.sp
  \fIcode\fP                      Compiled regular expression
  \fIextra\fP                     Result of \fBpcre_study()\fP or NULL
  \fIwhat\fP                      What information is required
  \fIwhere\fP                     Where to put the information
.sp
The following information is available:
.sp
  PCRE_INFO_BACKREFMAX      Number of highest back reference
  PCRE_INFO_CAPTURECOUNT    Number of capturing subpatterns
  PCRE_INFO_DEFAULT_TABLES  Pointer to default tables
  PCRE_INFO_FIRSTBYTE       Fixed first byte for a match, or
                              -1 for start of string
                                 or after newline, or
                              -2 otherwise
  PCRE_INFO_FIRSTTABLE      Table of first bytes (after studying)
  PCRE_INFO_JCHANGED        Return 1 if (?J) or (?-J) was used
  PCRE_INFO_LASTLITERAL     Literal last byte required
  PCRE_INFO_MINLENGTH       Lower bound length of matching strings
  PCRE_INFO_NAMECOUNT       Number of named subpatterns
  PCRE_INFO_NAMEENTRYSIZE   Size of name table entry
  PCRE_INFO_NAMETABLE       Pointer to name table
  PCRE_INFO_OKPARTIAL       Return 1 if partial matching can be tried
                              (always returns 1 after release 8.00)
  PCRE_INFO_OPTIONS         Option bits used for compilation
  PCRE_INFO_SIZE            Size of compiled pattern
  PCRE_INFO_STUDYSIZE       Size of study data
.sp
The yield of the function is zero on success or:
.sp
  PCRE_ERROR_NULL           the argument \fIcode\fP was NULL
                            the argument \fIwhere\fP was NULL
  PCRE_ERROR_BADMAGIC       the "magic number" was not found
  PCRE_ERROR_BADOPTION      the value of \fIwhat\fP was invalid
.P
There is a complete description of the PCRE native API in the
.\" HREF
\fBpcreapi\fP
.\"
page and a description of the POSIX API in the
.\" HREF
\fBpcreposix\fP
.\"
page.
usr/share/man/man1/pcregrep.1000064400000057476150403561450012005 0ustar00.TH PCREGREP 1
.SH NAME
pcregrep - a grep with Perl-compatible regular expressions.
.SH SYNOPSIS
.B pcregrep [options] [long options] [pattern] [path1 path2 ...]
.
.SH DESCRIPTION
.rs
.sp
\fBpcregrep\fP searches files for character patterns, in the same way as other
grep commands do, but it uses the PCRE regular expression library to support
patterns that are compatible with the regular expressions of Perl 5. See
.\" HREF
\fBpcrepattern\fP(3)
.\"
for a full description of syntax and semantics of the regular expressions
that PCRE supports.
.P
Patterns, whether supplied on the command line or in a separate file, are given
without delimiters. For example:
.sp
  pcregrep Thursday /etc/motd
.sp
If you attempt to use delimiters (for example, by surrounding a pattern with
slashes, as is common in Perl scripts), they are interpreted as part of the
pattern. Quotes can of course be used to delimit patterns on the command line
because they are interpreted by the shell, and indeed they are required if a
pattern contains white space or shell metacharacters.
.P
The first argument that follows any option settings is treated as the single
pattern to be matched when neither \fB-e\fP nor \fB-f\fP is present.
Conversely, when one or both of these options are used to specify patterns, all
arguments are treated as path names. At least one of \fB-e\fP, \fB-f\fP, or an
argument pattern must be provided.
.P
If no files are specified, \fBpcregrep\fP reads the standard input. The
standard input can also be referenced by a name consisting of a single hyphen.
For example:
.sp
  pcregrep some-pattern /file1 - /file3
.sp
By default, each line that matches a pattern is copied to the standard
output, and if there is more than one file, the file name is output at the
start of each line, followed by a colon. However, there are options that can
change how \fBpcregrep\fP behaves. In particular, the \fB-M\fP option makes it
possible to search for patterns that span line boundaries. What defines a line
boundary is controlled by the \fB-N\fP (\fB--newline\fP) option.
.P
Patterns are limited to 8K or BUFSIZ characters, whichever is the greater.
BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
(specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to
each line in the order in which they are defined, except that all the \fB-e\fP
patterns are tried before the \fB-f\fP patterns.
.P
By default, as soon as one pattern matches (or fails to match when \fB-v\fP is
used), no further patterns are considered. However, if \fB--colour\fP (or
\fB--color\fP) is used to colour the matching substrings, or if
\fB--only-matching\fP, \fB--file-offsets\fP, or \fB--line-offsets\fP is used to
output only the part of the line that matched (either shown literally, or as an
offset), scanning resumes immediately following the match, so that further
matches on the same line can be found. If there are multiple patterns, they are
all tried on the remainder of the line, but patterns that follow the one that
matched are not tried on the earlier part of the line.
.P
This is the same behaviour as GNU grep, but it does mean that the order in
which multiple patterns are specified can affect the output when one of the
above options is used.
.P
Patterns that can match an empty string are accepted, but empty string
matches are never recognized. An example is the pattern "(super)?(man)?", in
which all components are optional. This pattern finds all occurrences of both
"super" and "man"; the output differs from matching with "super|man" when only
the matching substrings are being shown.
.P
If the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variable is set,
\fBpcregrep\fP uses the value to set a locale when calling the PCRE library.
The \fB--locale\fP option can be used to override this.
.
.SH "SUPPORT FOR COMPRESSED FILES"
.rs
.sp
It is possible to compile \fBpcregrep\fP so that it uses \fBlibz\fP or
\fBlibbz2\fP to read files whose names end in \fB.gz\fP or \fB.bz2\fP,
respectively. You can find out whether your binary has support for one or both
of these file types by running it with the \fB--help\fP option. If the
appropriate support is not present, files are treated as plain text. The
standard input is always so treated.
.
.SH OPTIONS
.rs
.sp
The order in which some of the options appear can affect the output. For
example, both the \fB-h\fP and \fB-l\fP options affect the printing of file
names. Whichever comes later in the command line will be the one that takes
effect.
.TP 10
\fB--\fP
This terminate the list of options. It is useful if the next item on the
command line starts with a hyphen but is not an option. This allows for the
processing of patterns and filenames that start with hyphens.
.TP
\fB-A\fP \fInumber\fP, \fB--after-context=\fP\fInumber\fP
Output \fInumber\fP lines of context after each matching line. If filenames
and/or line numbers are being output, a hyphen separator is used instead of a
colon for the context lines. A line containing "--" is output between each
group of lines, unless they are in fact contiguous in the input file. The value
of \fInumber\fP is expected to be relatively small. However, \fBpcregrep\fP
guarantees to have up to 8K of following text available for context output.
.TP
\fB-B\fP \fInumber\fP, \fB--before-context=\fP\fInumber\fP
Output \fInumber\fP lines of context before each matching line. If filenames
and/or line numbers are being output, a hyphen separator is used instead of a
colon for the context lines. A line containing "--" is output between each
group of lines, unless they are in fact contiguous in the input file. The value
of \fInumber\fP is expected to be relatively small. However, \fBpcregrep\fP
guarantees to have up to 8K of preceding text available for context output.
.TP
\fB-C\fP \fInumber\fP, \fB--context=\fP\fInumber\fP
Output \fInumber\fP lines of context both before and after each matching line.
This is equivalent to setting both \fB-A\fP and \fB-B\fP to the same value.
.TP
\fB-c\fP, \fB--count\fP
Do not output individual lines from the files that are being scanned; instead
output the number of lines that would otherwise have been shown. If no lines
are selected, the number zero is output. If several files are are being
scanned, a count is output for each of them. However, if the
\fB--files-with-matches\fP option is also used, only those files whose counts
are greater than zero are listed. When \fB-c\fP is used, the \fB-A\fP,
\fB-B\fP, and \fB-C\fP options are ignored.
.TP
\fB--colour\fP, \fB--color\fP
If this option is given without any data, it is equivalent to "--colour=auto".
If data is required, it must be given in the same shell item, separated by an
equals sign.
.TP
\fB--colour=\fP\fIvalue\fP, \fB--color=\fP\fIvalue\fP
This option specifies under what circumstances the parts of a line that matched
a pattern should be coloured in the output. By default, the output is not
coloured. The value (which is optional, see above) may be "never", "always", or
"auto". In the latter case, colouring happens only if the standard output is
connected to a terminal. More resources are used when colouring is enabled,
because \fBpcregrep\fP has to search for all possible matches in a line, not
just one, in order to colour them all.

The colour that is used can be specified by setting the environment variable
PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
string of two numbers, separated by a semicolon. They are copied directly into
the control string for setting colour on a terminal, so it is your
responsibility to ensure that they make sense. If neither of the environment
variables is set, the default is "1;31", which gives red.
.TP
\fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP
If an input path is not a regular file or a directory, "action" specifies how
it is to be processed. Valid values are "read" (the default) or "skip"
(silently skip the path).
.TP
\fB-d\fP \fIaction\fP, \fB--directories=\fP\fIaction\fP
If an input path is a directory, "action" specifies how it is to be processed.
Valid values are "read" (the default), "recurse" (equivalent to the \fB-r\fP
option), or "skip" (silently skip the path). In the default case, directories
are read as if they were ordinary files. In some operating systems the effect
of reading a directory like this is an immediate end-of-file.
.TP
\fB-e\fP \fIpattern\fP, \fB--regex=\fP\fIpattern\fP, \fB--regexp=\fP\fIpattern\fP
Specify a pattern to be matched. This option can be used multiple times in
order to specify several patterns. It can also be used as a way of specifying a
single pattern that starts with a hyphen. When \fB-e\fP is used, no argument
pattern is taken from the command line; all arguments are treated as file
names. There is an overall maximum of 100 patterns. They are applied to each
line in the order in which they are defined until one matches (or fails to
match if \fB-v\fP is used). If \fB-f\fP is used with \fB-e\fP, the command line
patterns are matched first, followed by the patterns from the file, independent
of the order in which these options are specified. Note that multiple use of
\fB-e\fP is not the same as a single pattern with alternatives. For example,
X|Y finds the first character in a line that is X or Y, whereas if the two
patterns are given separately, \fBpcregrep\fP finds X if it is present, even if
it follows Y in the line. It finds Y only if there is no X in the line. This
really matters only if you are using \fB-o\fP to show the part(s) of the line
that matched.
.TP
\fB--exclude\fP=\fIpattern\fP
When \fBpcregrep\fP is searching the files in a directory as a consequence of
the \fB-r\fP (recursive search) option, any regular files whose names match the
pattern are excluded. Subdirectories are not excluded by this option; they are
searched recursively, subject to the \fB--exclude_dir\fP and
\fB--include_dir\fP options. The pattern is a PCRE regular expression, and is
matched against the final component of the file name (not the entire path). If
a file name matches both \fB--include\fP and \fB--exclude\fP, it is excluded.
There is no short form for this option.
.TP
\fB--exclude_dir\fP=\fIpattern\fP
When \fBpcregrep\fP is searching the contents of a directory as a consequence
of the \fB-r\fP (recursive search) option, any subdirectories whose names match
the pattern are excluded. (Note that the \fP--exclude\fP option does not affect
subdirectories.) The pattern is a PCRE regular expression, and is matched
against the final component of the name (not the entire path). If a
subdirectory name matches both \fB--include_dir\fP and \fB--exclude_dir\fP, it
is excluded. There is no short form for this option.
.TP
\fB-F\fP, \fB--fixed-strings\fP
Interpret each pattern as a list of fixed strings, separated by newlines,
instead of as a regular expression. The \fB-w\fP (match as a word) and \fB-x\fP
(match whole line) options can be used with \fB-F\fP. They apply to each of the
fixed strings. A line is selected if any of the fixed strings are found in it
(subject to \fB-w\fP or \fB-x\fP, if present).
.TP
\fB-f\fP \fIfilename\fP, \fB--file=\fP\fIfilename\fP
Read a number of patterns from the file, one per line, and match them against
each line of input. A data line is output if any of the patterns match it. The
filename can be given as "-" to refer to the standard input. When \fB-f\fP is
used, patterns specified on the command line using \fB-e\fP may also be
present; they are tested before the file's patterns. However, no other pattern
is taken from the command line; all arguments are treated as file names. There
is an overall maximum of 100 patterns. Trailing white space is removed from
each line, and blank lines are ignored. An empty file contains no patterns and
therefore matches nothing. See also the comments about multiple patterns versus
a single pattern with alternatives in the description of \fB-e\fP above.
.TP
\fB--file-offsets\fP
Instead of showing lines or parts of lines that match, show each match as an
offset from the start of the file and a length, separated by a comma. In this
mode, no context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP
options are ignored. If there is more than one match in a line, each of them is
shown separately. This option is mutually exclusive with \fB--line-offsets\fP
and \fB--only-matching\fP.
.TP
\fB-H\fP, \fB--with-filename\fP
Force the inclusion of the filename at the start of output lines when searching
a single file. By default, the filename is not shown in this case. For matching
lines, the filename is followed by a colon; for context lines, a hyphen
separator is used. If a line number is also being output, it follows the file
name.
.TP
\fB-h\fP, \fB--no-filename\fP
Suppress the output filenames when searching multiple files. By default,
filenames are shown when multiple files are searched. For matching lines, the
filename is followed by a colon; for context lines, a hyphen separator is used.
If a line number is also being output, it follows the file name.
.TP
\fB--help\fP
Output a help message, giving brief details of the command options and file
type support, and then exit.
.TP
\fB-i\fP, \fB--ignore-case\fP
Ignore upper/lower case distinctions during comparisons.
.TP
\fB--include\fP=\fIpattern\fP
When \fBpcregrep\fP is searching the files in a directory as a consequence of
the \fB-r\fP (recursive search) option, only those regular files whose names
match the pattern are included. Subdirectories are always included and searched
recursively, subject to the \fP--include_dir\fP and \fB--exclude_dir\fP
options. The pattern is a PCRE regular expression, and is matched against the
final component of the file name (not the entire path). If a file name matches
both \fB--include\fP and \fB--exclude\fP, it is excluded. There is no short
form for this option.
.TP
\fB--include_dir\fP=\fIpattern\fP
When \fBpcregrep\fP is searching the contents of a directory as a consequence
of the \fB-r\fP (recursive search) option, only those subdirectories whose
names match the pattern are included. (Note that the \fB--include\fP option
does not affect subdirectories.) The pattern is a PCRE regular expression, and
is matched against the final component of the name (not the entire path). If a
subdirectory name matches both \fB--include_dir\fP and \fB--exclude_dir\fP, it
is excluded. There is no short form for this option.
.TP
\fB-L\fP, \fB--files-without-match\fP
Instead of outputting lines from the files, just output the names of the files
that do not contain any lines that would have been output. Each file name is
output once, on a separate line.
.TP
\fB-l\fP, \fB--files-with-matches\fP
Instead of outputting lines from the files, just output the names of the files
containing lines that would have been output. Each file name is output
once, on a separate line. Searching normally stops as soon as a matching line
is found in a file. However, if the \fB-c\fP (count) option is also used,
matching continues in order to obtain the correct count, and those files that
have at least one match are listed along with their counts. Using this option
with \fB-c\fP is a way of suppressing the listing of files with no matches.
.TP
\fB--label\fP=\fIname\fP
This option supplies a name to be used for the standard input when file names
are being output. If not supplied, "(standard input)" is used. There is no
short form for this option.
.TP
\fB--line-offsets\fP
Instead of showing lines or parts of lines that match, show each match as a
line number, the offset from the start of the line, and a length. The line
number is terminated by a colon (as usual; see the \fB-n\fP option), and the
offset and length are separated by a comma. In this mode, no context is shown.
That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are ignored. If there is
more than one match in a line, each of them is shown separately. This option is
mutually exclusive with \fB--file-offsets\fP and \fB--only-matching\fP.
.TP
\fB--locale\fP=\fIlocale-name\fP
This option specifies a locale to be used for pattern matching. It overrides
the value in the \fBLC_ALL\fP or \fBLC_CTYPE\fP environment variables. If no
locale is specified, the PCRE library's default (usually the "C" locale) is
used. There is no short form for this option.
.TP
\fB-M\fP, \fB--multiline\fP
Allow patterns to match more than one line. When this option is given, patterns
may usefully contain literal newline characters and internal occurrences of ^
and $ characters. The output for any one match may consist of more than one
line. When this option is set, the PCRE library is called in "multiline" mode.
There is a limit to the number of lines that can be matched, imposed by the way
that \fBpcregrep\fP buffers the input file as it scans it. However,
\fBpcregrep\fP ensures that at least 8K characters or the rest of the document
(whichever is the shorter) are available for forward matching, and similarly
the previous 8K characters (or all the previous characters, if fewer than 8K)
are guaranteed to be available for lookbehind assertions.
.TP
\fB-N\fP \fInewline-type\fP, \fB--newline=\fP\fInewline-type\fP
The PCRE library supports five different conventions for indicating
the ends of lines. They are the single-character sequences CR (carriage return)
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
which recognizes any of the preceding three types, and an "any" convention, in
which any Unicode line ending sequence is assumed to end a line. The Unicode
sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
(formfeed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
PS (paragraph separator, U+2029).
.sp
When the PCRE library is built, a default line-ending sequence is specified.
This is normally the standard sequence for the operating system. Unless
otherwise specified by this option, \fBpcregrep\fP uses the library's default.
The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
makes it possible to use \fBpcregrep\fP on files that have come from other
environments without having to modify their line endings. If the data that is
being scanned does not agree with the convention set by this option,
\fBpcregrep\fP may behave in strange ways.
.TP
\fB-n\fP, \fB--line-number\fP
Precede each output line by its line number in the file, followed by a colon
for matching lines or a hyphen for context lines. If the filename is also being
output, it precedes the line number. This option is forced if
\fB--line-offsets\fP is used.
.TP
\fB-o\fP, \fB--only-matching\fP
Show only the part of the line that matched a pattern. In this mode, no
context is shown. That is, the \fB-A\fP, \fB-B\fP, and \fB-C\fP options are
ignored. If there is more than one match in a line, each of them is shown
separately. If \fB-o\fP is combined with \fB-v\fP (invert the sense of the
match to find non-matching lines), no output is generated, but the return code
is set appropriately. This option is mutually exclusive with
\fB--file-offsets\fP and \fB--line-offsets\fP.
.TP
\fB-q\fP, \fB--quiet\fP
Work quietly, that is, display nothing except error messages. The exit
status indicates whether or not any matches were found.
.TP
\fB-r\fP, \fB--recursive\fP
If any given path is a directory, recursively scan the files it contains,
taking note of any \fB--include\fP and \fB--exclude\fP settings. By default, a
directory is read as a normal file; in some operating systems this gives an
immediate end-of-file. This option is a shorthand for setting the \fB-d\fP
option to "recurse".
.TP
\fB-s\fP, \fB--no-messages\fP
Suppress error messages about non-existent or unreadable files. Such files are
quietly skipped. However, the return code is still 2, even if matches were
found in other files.
.TP
\fB-u\fP, \fB--utf-8\fP
Operate in UTF-8 mode. This option is available only if PCRE has been compiled
with UTF-8 support. Both patterns and subject lines must be valid strings of
UTF-8 characters.
.TP
\fB-V\fP, \fB--version\fP
Write the version numbers of \fBpcregrep\fP and the PCRE library that is being
used to the standard error stream.
.TP
\fB-v\fP, \fB--invert-match\fP
Invert the sense of the match, so that lines which do \fInot\fP match any of
the patterns are the ones that are found.
.TP
\fB-w\fP, \fB--word-regex\fP, \fB--word-regexp\fP
Force the patterns to match only whole words. This is equivalent to having \eb
at the start and end of the pattern.
.TP
\fB-x\fP, \fB--line-regex\fP, \fB--line-regexp\fP
Force the patterns to be anchored (each must start matching at the beginning of
a line) and in addition, require them to match entire lines. This is
equivalent to having ^ and $ characters at the start and end of each
alternative branch in every pattern.
.
.
.SH "ENVIRONMENT VARIABLES"
.rs
.sp
The environment variables \fBLC_ALL\fP and \fBLC_CTYPE\fP are examined, in that
order, for a locale. The first one that is set is used. This can be overridden
by the \fB--locale\fP option. If no locale is set, the PCRE library's default
(usually the "C" locale) is used.
.
.
.SH "NEWLINES"
.rs
.sp
The \fB-N\fP (\fB--newline\fP) option allows \fBpcregrep\fP to scan files with
different newline conventions from the default. However, the setting of this
option does not affect the way in which \fBpcregrep\fP writes information to
the standard error and output streams. It uses the string "\en" in C
\fBprintf()\fP calls to indicate newlines, relying on the C I/O library to
convert this to an appropriate sequence if the output is sent to a file.
.
.
.SH "OPTIONS COMPATIBILITY"
.rs
.sp
The majority of short and long forms of \fBpcregrep\fP's options are the same
as in the GNU \fBgrep\fP program. Any long option of the form
\fB--xxx-regexp\fP (GNU terminology) is also available as \fB--xxx-regex\fP
(PCRE terminology). However, the \fB--locale\fP, \fB-M\fP, \fB--multiline\fP,
\fB-u\fP, and \fB--utf-8\fP options are specific to \fBpcregrep\fP. If both the
\fB-c\fP and \fB-l\fP options are given, GNU grep lists only file names,
without counts, but \fBpcregrep\fP gives the counts.
.
.
.SH "OPTIONS WITH DATA"
.rs
.sp
There are four different ways in which an option with data can be specified.
If a short form option is used, the data may follow immediately, or in the next
command line item. For example:
.sp
  -f/some/file
  -f /some/file
.sp
If a long form option is used, the data may appear in the same command line
item, separated by an equals character, or (with one exception) it may appear
in the next command line item. For example:
.sp
  --file=/some/file
  --file /some/file
.sp
Note, however, that if you want to supply a file name beginning with ~ as data
in a shell command, and have the shell expand ~ to a home directory, you must
separate the file name from the option, because the shell does not treat ~
specially unless it is at the start of an item.
.P
The exception to the above is the \fB--colour\fP (or \fB--color\fP) option,
for which the data is optional. If this option does have data, it must be given
in the first form, using an equals character. Otherwise it will be assumed that
it has no data.
.
.
.SH "MATCHING ERRORS"
.rs
.sp
It is possible to supply a regular expression that takes a very long time to
fail to match certain lines. Such patterns normally involve nested indefinite
repeats, for example: (a+)*\ed when matched against a line of a's with no final
digit. The PCRE matching function has a resource limit that causes it to abort
in these circumstances. If this happens, \fBpcregrep\fP outputs an error
message and the line that caused the problem to the standard error stream. If
there are more than 20 such errors, \fBpcregrep\fP gives up.
.
.
.SH DIAGNOSTICS
.rs
.sp
Exit status is 0 if any matches were found, 1 if no matches were found, and 2
for syntax errors and non-existent or inacessible files (even if matches were
found in other files) or too many matching errors. Using the \fB-s\fP option to
suppress error messages about inaccessble files does not affect the return
code.
.
.
.SH "SEE ALSO"
.rs
.sp
\fBpcrepattern\fP(3), \fBpcretest\fP(1).
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 13 September 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man1/pcretest.1000064400000070612150403561450012012 0ustar00.TH PCRETEST 1
.SH NAME
pcretest - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
.rs
.sp
.B pcretest "[options] [source] [destination]"
.sp
\fBpcretest\fP was written as a test program for the PCRE regular expression
library itself, but it can also be used for experimenting with regular
expressions. This document describes the features of the test program; for
details of the regular expressions themselves, see the
.\" HREF
\fBpcrepattern\fP
.\"
documentation. For details of the PCRE library function calls and their
options, see the
.\" HREF
\fBpcreapi\fP
.\"
documentation.
.
.
.SH OPTIONS
.rs
.TP 10
\fB-b\fP
Behave as if each regex has the \fB/B\fP (show bytecode) modifier; the internal
form is output after compilation.
.TP 10
\fB-C\fP
Output the version number of the PCRE library, and all available information
about the optional features that are included, and then exit.
.TP 10
\fB-d\fP
Behave as if each regex has the \fB/D\fP (debug) modifier; the internal
form and information about the compiled pattern is output after compilation;
\fB-d\fP is equivalent to \fB-b -i\fP.
.TP 10
\fB-dfa\fP
Behave as if each data line contains the \eD escape sequence; this causes the
alternative matching function, \fBpcre_dfa_exec()\fP, to be used instead of the
standard \fBpcre_exec()\fP function (more detail is given below).
.TP 10
\fB-help\fP
Output a brief summary these options and then exit.
.TP 10
\fB-i\fP
Behave as if each regex has the \fB/I\fP modifier; information about the
compiled pattern is given after compilation.
.TP 10
\fB-M\fP
Behave as if each data line contains the \eM escape sequence; this causes
PCRE to discover the minimum MATCH_LIMIT and MATCH_LIMIT_RECURSION settings by
calling \fBpcre_exec()\fP repeatedly with different limits.
.TP 10
\fB-m\fP
Output the size of each compiled pattern after it has been compiled. This is
equivalent to adding \fB/M\fP to each regular expression. For compatibility
with earlier versions of pcretest, \fB-s\fP is a synonym for \fB-m\fP.
.TP 10
\fB-o\fP \fIosize\fP
Set the number of elements in the output vector that is used when calling
\fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP to be \fIosize\fP. The default value
is 45, which is enough for 14 capturing subexpressions for \fBpcre_exec()\fP or
22 different matches for \fBpcre_dfa_exec()\fP. The vector size can be
changed for individual matching calls by including \eO in the data line (see
below).
.TP 10
\fB-p\fP
Behave as if each regex has the \fB/P\fP modifier; the POSIX wrapper API is
used to call PCRE. None of the other options has any effect when \fB-p\fP is
set.
.TP 10
\fB-q\fP
Do not output the version number of \fBpcretest\fP at the start of execution.
.TP 10
\fB-S\fP \fIsize\fP
On Unix-like systems, set the size of the runtime stack to \fIsize\fP
megabytes.
.TP 10
\fB-t\fP
Run each compile, study, and match many times with a timer, and output
resulting time per compile or match (in milliseconds). Do not set \fB-m\fP with
\fB-t\fP, because you will then get the size output a zillion times, and the
timing will be distorted. You can control the number of iterations that are
used for timing by following \fB-t\fP with a number (as a separate item on the
command line). For example, "-t 1000" would iterate 1000 times. The default is
to iterate 500000 times.
.TP 10
\fB-tm\fP
This is like \fB-t\fP except that it times only the matching phase, not the
compile or study phases.
.
.
.SH DESCRIPTION
.rs
.sp
If \fBpcretest\fP is given two filename arguments, it reads from the first and
writes to the second. If it is given only one filename argument, it reads from
that file and writes to stdout. Otherwise, it reads from stdin and writes to
stdout, and prompts for each line of input, using "re>" to prompt for regular
expressions, and "data>" to prompt for data lines.
.P
When \fBpcretest\fP is built, a configuration option can specify that it should
be linked with the \fBlibreadline\fP library. When this is done, if the input
is from a terminal, it is read using the \fBreadline()\fP function. This
provides line-editing and history facilities. The output from the \fB-help\fP
option states whether or not \fBreadline()\fP will be used.
.P
The program handles any number of sets of input on a single input file. Each
set starts with a regular expression, and continues with any number of data
lines to be matched against the pattern.
.P
Each data line is matched separately and independently. If you want to do
multi-line matches, you have to use the \en escape sequence (or \er or \er\en,
etc., depending on the newline setting) in a single line of input to encode the
newline sequences. There is no limit on the length of data lines; the input
buffer is automatically extended if it is too small.
.P
An empty line signals the end of the data lines, at which point a new regular
expression is read. The regular expressions are given enclosed in any
non-alphanumeric delimiters other than backslash, for example:
.sp
  /(a|bc)x+yz/
.sp
White space before the initial delimiter is ignored. A regular expression may
be continued over several input lines, in which case the newline characters are
included within it. It is possible to include the delimiter within the pattern
by escaping it, for example
.sp
  /abc\e/def/
.sp
If you do so, the escape and the delimiter form part of the pattern, but since
delimiters are always non-alphanumeric, this does not affect its interpretation.
If the terminating delimiter is immediately followed by a backslash, for
example,
.sp
  /abc/\e
.sp
then a backslash is added to the end of the pattern. This is done to provide a
way of testing the error condition that arises if a pattern finishes with a
backslash, because
.sp
  /abc\e/
.sp
is interpreted as the first line of a pattern that starts with "abc/", causing
pcretest to read the next line as a continuation of the regular expression.
.
.
.SH "PATTERN MODIFIERS"
.rs
.sp
A pattern may be followed by any number of modifiers, which are mostly single
characters. Following Perl usage, these are referred to below as, for example,
"the \fB/i\fP modifier", even though the delimiter of the pattern need not
always be a slash, and no slash is used when writing modifiers. Whitespace may
appear between the final pattern delimiter and the first modifier, and between
the modifiers themselves.
.P
The \fB/i\fP, \fB/m\fP, \fB/s\fP, and \fB/x\fP modifiers set the PCRE_CASELESS,
PCRE_MULTILINE, PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when
\fBpcre_compile()\fP is called. These four modifier letters have the same
effect as they do in Perl. For example:
.sp
  /caseless/i
.sp
The following table shows additional modifiers for setting PCRE options that do
not correspond to anything in Perl:
.sp
  \fB/A\fP              PCRE_ANCHORED
  \fB/C\fP              PCRE_AUTO_CALLOUT
  \fB/E\fP              PCRE_DOLLAR_ENDONLY
  \fB/f\fP              PCRE_FIRSTLINE
  \fB/J\fP              PCRE_DUPNAMES
  \fB/N\fP              PCRE_NO_AUTO_CAPTURE
  \fB/U\fP              PCRE_UNGREEDY
  \fB/X\fP              PCRE_EXTRA
  \fB/<JS>\fP           PCRE_JAVASCRIPT_COMPAT
  \fB/<cr>\fP           PCRE_NEWLINE_CR
  \fB/<lf>\fP           PCRE_NEWLINE_LF
  \fB/<crlf>\fP         PCRE_NEWLINE_CRLF
  \fB/<anycrlf>\fP      PCRE_NEWLINE_ANYCRLF
  \fB/<any>\fP          PCRE_NEWLINE_ANY
  \fB/<bsr_anycrlf>\fP  PCRE_BSR_ANYCRLF
  \fB/<bsr_unicode>\fP  PCRE_BSR_UNICODE
.sp
Those specifying line ending sequences are literal strings as shown, but the
letters can be in either case. This example sets multiline matching with CRLF
as the line ending sequence:
.sp
  /^abc/m<crlf>
.sp
Details of the meanings of these PCRE options are given in the
.\" HREF
\fBpcreapi\fP
.\"
documentation.
.
.
.SS "Finding all matches in a string"
.rs
.sp
Searching for all possible matches within each subject string can be requested
by the \fB/g\fP or \fB/G\fP modifier. After finding a match, PCRE is called
again to search the remainder of the subject string. The difference between
\fB/g\fP and \fB/G\fP is that the former uses the \fIstartoffset\fP argument to
\fBpcre_exec()\fP to start searching at a new point within the entire string
(which is in effect what Perl does), whereas the latter passes over a shortened
substring. This makes a difference to the matching process if the pattern
begins with a lookbehind assertion (including \eb or \eB).
.P
If any call to \fBpcre_exec()\fP in a \fB/g\fP or \fB/G\fP sequence matches an
empty string, the next call is done with the PCRE_NOTEMPTY_ATSTART and
PCRE_ANCHORED flags set in order to search for another, non-empty, match at the
same point. If this second match fails, the start offset is advanced by one
character, and the normal match is retried. This imitates the way Perl handles
such cases when using the \fB/g\fP modifier or the \fBsplit()\fP function.
.
.
.SS "Other modifiers"
.rs
.sp
There are yet more modifiers for controlling the way \fBpcretest\fP
operates.
.P
The \fB/+\fP modifier requests that as well as outputting the substring that
matched the entire pattern, pcretest should in addition output the remainder of
the subject string. This is useful for tests where the subject contains
multiple copies of the same substring.
.P
The \fB/B\fP modifier is a debugging feature. It requests that \fBpcretest\fP
output a representation of the compiled byte code after compilation. Normally
this information contains length and offset values; however, if \fB/Z\fP is
also present, this data is replaced by spaces. This is a special feature for
use in the automatic test scripts; it ensures that the same output is generated
for different internal link sizes.
.P
The \fB/L\fP modifier must be followed directly by the name of a locale, for
example,
.sp
  /pattern/Lfr_FR
.sp
For this reason, it must be the last modifier. The given locale is set,
\fBpcre_maketables()\fP is called to build a set of character tables for the
locale, and this is then passed to \fBpcre_compile()\fP when compiling the
regular expression. Without an \fB/L\fP modifier, NULL is passed as the tables
pointer; that is, \fB/L\fP applies only to the expression on which it appears.
.P
The \fB/I\fP modifier requests that \fBpcretest\fP output information about the
compiled pattern (whether it is anchored, has a fixed first character, and
so on). It does this by calling \fBpcre_fullinfo()\fP after compiling a
pattern. If the pattern is studied, the results of that are also output.
.P
The \fB/D\fP modifier is a PCRE debugging feature, and is equivalent to
\fB/BI\fP, that is, both the \fB/B\fP and the \fB/I\fP modifiers.
.P
The \fB/F\fP modifier causes \fBpcretest\fP to flip the byte order of the
fields in the compiled pattern that contain 2-byte and 4-byte numbers. This
facility is for testing the feature in PCRE that allows it to execute patterns
that were compiled on a host with a different endianness. This feature is not
available when the POSIX interface to PCRE is being used, that is, when the
\fB/P\fP pattern modifier is specified. See also the section about saving and
reloading compiled patterns below.
.P
The \fB/S\fP modifier causes \fBpcre_study()\fP to be called after the
expression has been compiled, and the results used when the expression is
matched.
.P
The \fB/M\fP modifier causes the size of memory block used to hold the compiled
pattern to be output.
.P
The \fB/P\fP modifier causes \fBpcretest\fP to call PCRE via the POSIX wrapper
API rather than its native API. When this is done, all other modifiers except
\fB/i\fP, \fB/m\fP, and \fB/+\fP are ignored. REG_ICASE is set if \fB/i\fP is
present, and REG_NEWLINE is set if \fB/m\fP is present. The wrapper functions
force PCRE_DOLLAR_ENDONLY always, and PCRE_DOTALL unless REG_NEWLINE is set.
.P
The \fB/8\fP modifier causes \fBpcretest\fP to call PCRE with the PCRE_UTF8
option set. This turns on support for UTF-8 character handling in PCRE,
provided that it was compiled with this support enabled. This modifier also
causes any non-printing characters in output strings to be printed using the
\ex{hh...} notation if they are valid UTF-8 sequences.
.P
If the \fB/?\fP modifier is used with \fB/8\fP, it causes \fBpcretest\fP to
call \fBpcre_compile()\fP with the PCRE_NO_UTF8_CHECK option, to suppress the
checking of the string for UTF-8 validity.
.
.
.SH "DATA LINES"
.rs
.sp
Before each data line is passed to \fBpcre_exec()\fP, leading and trailing
whitespace is removed, and it is then scanned for \e escapes. Some of these are
pretty esoteric features, intended for checking out some of the more
complicated features of PCRE. If you are just testing "ordinary" regular
expressions, you probably don't need any of these. The following escapes are
recognized:
.sp
  \ea         alarm (BEL, \ex07)
  \eb         backspace (\ex08)
  \ee         escape (\ex27)
  \ef         formfeed (\ex0c)
  \en         newline (\ex0a)
.\" JOIN
  \eqdd       set the PCRE_MATCH_LIMIT limit to dd
               (any number of digits)
  \er         carriage return (\ex0d)
  \et         tab (\ex09)
  \ev         vertical tab (\ex0b)
  \ennn       octal character (up to 3 octal digits)
  \exhh       hexadecimal character (up to 2 hex digits)
.\" JOIN
  \ex{hh...}  hexadecimal character, any number of digits
               in UTF-8 mode
.\" JOIN
  \eA         pass the PCRE_ANCHORED option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.\" JOIN
  \eB         pass the PCRE_NOTBOL option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.\" JOIN
  \eCdd       call pcre_copy_substring() for substring dd
               after a successful match (number less than 32)
.\" JOIN
  \eCname     call pcre_copy_named_substring() for substring
               "name" after a successful match (name termin-
               ated by next non alphanumeric character)
.\" JOIN
  \eC+        show the current captured substrings at callout
               time
  \eC-        do not supply a callout function
.\" JOIN
  \eC!n       return 1 instead of 0 when callout number n is
               reached
.\" JOIN
  \eC!n!m     return 1 instead of 0 when callout number n is
               reached for the nth time
.\" JOIN
  \eC*n       pass the number n (may be negative) as callout
               data; this is used as the callout return value
  \eD         use the \fBpcre_dfa_exec()\fP match function
  \eF         only shortest match for \fBpcre_dfa_exec()\fP
.\" JOIN
  \eGdd       call pcre_get_substring() for substring dd
               after a successful match (number less than 32)
.\" JOIN
  \eGname     call pcre_get_named_substring() for substring
               "name" after a successful match (name termin-
               ated by next non-alphanumeric character)
.\" JOIN
  \eL         call pcre_get_substringlist() after a
               successful match
.\" JOIN
  \eM         discover the minimum MATCH_LIMIT and
               MATCH_LIMIT_RECURSION settings
.\" JOIN
  \eN         pass the PCRE_NOTEMPTY option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP; if used twice, pass the
               PCRE_NOTEMPTY_ATSTART option
.\" JOIN
  \eOdd       set the size of the output vector passed to
               \fBpcre_exec()\fP to dd (any number of digits)
.\" JOIN
  \eP         pass the PCRE_PARTIAL_SOFT option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP; if used twice, pass the
               PCRE_PARTIAL_HARD option
.\" JOIN
  \eQdd       set the PCRE_MATCH_LIMIT_RECURSION limit to dd
               (any number of digits)
  \eR         pass the PCRE_DFA_RESTART option to \fBpcre_dfa_exec()\fP
  \eS         output details of memory get/free calls during matching
.\" JOIN
  \eY         pass the PCRE_NO_START_OPTIMIZE option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.\" JOIN
  \eZ         pass the PCRE_NOTEOL option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.\" JOIN
  \e?         pass the PCRE_NO_UTF8_CHECK option to
               \fBpcre_exec()\fP or \fBpcre_dfa_exec()\fP
  \e>dd       start the match at offset dd (any number of digits);
.\" JOIN
               this sets the \fIstartoffset\fP argument for \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.\" JOIN
  \e<cr>      pass the PCRE_NEWLINE_CR option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.\" JOIN
  \e<lf>      pass the PCRE_NEWLINE_LF option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.\" JOIN
  \e<crlf>    pass the PCRE_NEWLINE_CRLF option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.\" JOIN
  \e<anycrlf> pass the PCRE_NEWLINE_ANYCRLF option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.\" JOIN
  \e<any>     pass the PCRE_NEWLINE_ANY option to \fBpcre_exec()\fP
               or \fBpcre_dfa_exec()\fP
.sp
The escapes that specify line ending sequences are literal strings, exactly as
shown. No more than one newline setting should be present in any data line.
.P
A backslash followed by anything else just escapes the anything else. If
the very last character is a backslash, it is ignored. This gives a way of
passing an empty line as data, since a real empty line terminates the data
input.
.P
If \eM is present, \fBpcretest\fP calls \fBpcre_exec()\fP several times, with
different values in the \fImatch_limit\fP and \fImatch_limit_recursion\fP
fields of the \fBpcre_extra\fP data structure, until it finds the minimum
numbers for each parameter that allow \fBpcre_exec()\fP to complete. The
\fImatch_limit\fP number is a measure of the amount of backtracking that takes
place, and checking it out can be instructive. For most simple matches, the
number is quite small, but for patterns with very large numbers of matching
possibilities, it can become large very quickly with increasing length of
subject string. The \fImatch_limit_recursion\fP number is a measure of how much
stack (or, if PCRE is compiled with NO_RECURSE, how much heap) memory is needed
to complete the match attempt.
.P
When \eO is used, the value specified may be higher or lower than the size set
by the \fB-O\fP command line option (or defaulted to 45); \eO applies only to
the call of \fBpcre_exec()\fP for the line in which it appears.
.P
If the \fB/P\fP modifier was present on the pattern, causing the POSIX wrapper
API to be used, the only option-setting sequences that have any effect are \eB
and \eZ, causing REG_NOTBOL and REG_NOTEOL, respectively, to be passed to
\fBregexec()\fP.
.P
The use of \ex{hh...} to represent UTF-8 characters is not dependent on the use
of the \fB/8\fP modifier on the pattern. It is recognized always. There may be
any number of hexadecimal digits inside the braces. The result is from one to
six bytes, encoded according to the original UTF-8 rules of RFC 2279. This
allows for values in the range 0 to 0x7FFFFFFF. Note that not all of those are
valid Unicode code points, or indeed valid UTF-8 characters according to the
later rules in RFC 3629.
.
.
.SH "THE ALTERNATIVE MATCHING FUNCTION"
.rs
.sp
By default, \fBpcretest\fP uses the standard PCRE matching function,
\fBpcre_exec()\fP to match each data line. From release 6.0, PCRE supports an
alternative matching function, \fBpcre_dfa_test()\fP, which operates in a
different way, and has some restrictions. The differences between the two
functions are described in the
.\" HREF
\fBpcrematching\fP
.\"
documentation.
.P
If a data line contains the \eD escape sequence, or if the command line
contains the \fB-dfa\fP option, the alternative matching function is called.
This function finds all possible matches at a given point. If, however, the \eF
escape sequence is present in the data line, it stops after the first match is
found. This is always the shortest possible match.
.
.
.SH "DEFAULT OUTPUT FROM PCRETEST"
.rs
.sp
This section describes the output when the normal matching function,
\fBpcre_exec()\fP, is being used.
.P
When a match succeeds, pcretest outputs the list of captured substrings that
\fBpcre_exec()\fP returns, starting with number 0 for the string that matched
the whole pattern. Otherwise, it outputs "No match" when the return is
PCRE_ERROR_NOMATCH, and "Partial match:" followed by the partially matching
substring when \fBpcre_exec()\fP returns PCRE_ERROR_PARTIAL. For any other
returns, it outputs the PCRE negative error number. Here is an example of an
interactive \fBpcretest\fP run.
.sp
  $ pcretest
  PCRE version 7.0 30-Nov-2006
.sp
    re> /^abc(\ed+)/
  data> abc123
   0: abc123
   1: 123
  data> xyz
  No match
.sp
Note that unset capturing substrings that are not followed by one that is set
are not returned by \fBpcre_exec()\fP, and are not shown by \fBpcretest\fP. In
the following example, there are two capturing substrings, but when the first
data line is matched, the second, unset substring is not shown. An "internal"
unset substring is shown as "<unset>", as for the second data line.
.sp
    re> /(a)|(b)/
  data> a
   0: a
   1: a
  data> b
   0: b
   1: <unset>
   2: b
.sp
If the strings contain any non-printing characters, they are output as \e0x
escapes, or as \ex{...} escapes if the \fB/8\fP modifier was present on the
pattern. See below for the definition of non-printing characters. If the
pattern has the \fB/+\fP modifier, the output for substring 0 is followed by
the the rest of the subject string, identified by "0+" like this:
.sp
    re> /cat/+
  data> cataract
   0: cat
   0+ aract
.sp
If the pattern has the \fB/g\fP or \fB/G\fP modifier, the results of successive
matching attempts are output in sequence, like this:
.sp
    re> /\eBi(\ew\ew)/g
  data> Mississippi
   0: iss
   1: ss
   0: iss
   1: ss
   0: ipp
   1: pp
.sp
"No match" is output only if the first match attempt fails.
.P
If any of the sequences \fB\eC\fP, \fB\eG\fP, or \fB\eL\fP are present in a
data line that is successfully matched, the substrings extracted by the
convenience functions are output with C, G, or L after the string number
instead of a colon. This is in addition to the normal full list. The string
length (that is, the return from the extraction function) is given in
parentheses after each string for \fB\eC\fP and \fB\eG\fP.
.P
Note that whereas patterns can be continued over several lines (a plain ">"
prompt is used for continuations), data lines may not. However newlines can be
included in data by means of the \en escape (or \er, \er\en, etc., depending on
the newline sequence setting).
.
.
.
.SH "OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION"
.rs
.sp
When the alternative matching function, \fBpcre_dfa_exec()\fP, is used (by
means of the \eD escape sequence or the \fB-dfa\fP command line option), the
output consists of a list of all the matches that start at the first point in
the subject where there is at least one match. For example:
.sp
    re> /(tang|tangerine|tan)/
  data> yellow tangerine\eD
   0: tangerine
   1: tang
   2: tan
.sp
(Using the normal matching function on this data finds only "tang".) The
longest matching string is always given first (and numbered zero). After a
PCRE_ERROR_PARTIAL return, the output is "Partial match:", followed by the
partially matching substring.
.P
If \fB/g\fP is present on the pattern, the search for further matches resumes
at the end of the longest match. For example:
.sp
    re> /(tang|tangerine|tan)/g
  data> yellow tangerine and tangy sultana\eD
   0: tangerine
   1: tang
   2: tan
   0: tang
   1: tan
   0: tan
.sp
Since the matching function does not support substring capture, the escape
sequences that are concerned with captured substrings are not relevant.
.
.
.SH "RESTARTING AFTER A PARTIAL MATCH"
.rs
.sp
When the alternative matching function has given the PCRE_ERROR_PARTIAL return,
indicating that the subject partially matched the pattern, you can restart the
match with additional subject data by means of the \eR escape sequence. For
example:
.sp
    re> /^\ed?\ed(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\ed\ed$/
  data> 23ja\eP\eD
  Partial match: 23ja
  data> n05\eR\eD
   0: n05
.sp
For further information about partial matching, see the
.\" HREF
\fBpcrepartial\fP
.\"
documentation.
.
.
.SH CALLOUTS
.rs
.sp
If the pattern contains any callout requests, \fBpcretest\fP's callout function
is called during matching. This works with both matching functions. By default,
the called function displays the callout number, the start and current
positions in the text at the callout time, and the next pattern item to be
tested. For example, the output
.sp
  --->pqrabcdef
    0    ^  ^     \ed
.sp
indicates that callout number 0 occurred for a match attempt starting at the
fourth character of the subject string, when the pointer was at the seventh
character of the data, and when the next pattern item was \ed. Just one
circumflex is output if the start and current positions are the same.
.P
Callouts numbered 255 are assumed to be automatic callouts, inserted as a
result of the \fB/C\fP pattern modifier. In this case, instead of showing the
callout number, the offset in the pattern, preceded by a plus, is output. For
example:
.sp
    re> /\ed?[A-E]\e*/C
  data> E*
  --->E*
   +0 ^      \ed?
   +3 ^      [A-E]
   +8 ^^     \e*
  +10 ^ ^
   0: E*
.sp
The callout function in \fBpcretest\fP returns zero (carry on matching) by
default, but you can use a \eC item in a data line (as described above) to
change this.
.P
Inserting callouts can be helpful when using \fBpcretest\fP to check
complicated regular expressions. For further information about callouts, see
the
.\" HREF
\fBpcrecallout\fP
.\"
documentation.
.
.
.
.SH "NON-PRINTING CHARACTERS"
.rs
.sp
When \fBpcretest\fP is outputting text in the compiled version of a pattern,
bytes other than 32-126 are always treated as non-printing characters are are
therefore shown as hex escapes.
.P
When \fBpcretest\fP is outputting text that is a matched part of a subject
string, it behaves in the same way, unless a different locale has been set for
the pattern (using the \fB/L\fP modifier). In this case, the \fBisprint()\fP
function to distinguish printing and non-printing characters.
.
.
.
.SH "SAVING AND RELOADING COMPILED PATTERNS"
.rs
.sp
The facilities described in this section are not available when the POSIX
inteface to PCRE is being used, that is, when the \fB/P\fP pattern modifier is
specified.
.P
When the POSIX interface is not in use, you can cause \fBpcretest\fP to write a
compiled pattern to a file, by following the modifiers with > and a file name.
For example:
.sp
  /pattern/im >/some/file
.sp
See the
.\" HREF
\fBpcreprecompile\fP
.\"
documentation for a discussion about saving and re-using compiled patterns.
.P
The data that is written is binary. The first eight bytes are the length of the
compiled pattern data followed by the length of the optional study data, each
written as four bytes in big-endian order (most significant byte first). If
there is no study data (either the pattern was not studied, or studying did not
return any data), the second length is zero. The lengths are followed by an
exact copy of the compiled pattern. If there is additional study data, this
follows immediately after the compiled pattern. After writing the file,
\fBpcretest\fP expects to read a new pattern.
.P
A saved pattern can be reloaded into \fBpcretest\fP by specifing < and a file
name instead of a pattern. The name of the file must not contain a < character,
as otherwise \fBpcretest\fP will interpret the line as a pattern delimited by <
characters.
For example:
.sp
   re> </some/file
  Compiled regex loaded from /some/file
  No study data
.sp
When the pattern has been loaded, \fBpcretest\fP proceeds to read data lines in
the usual way.
.P
You can copy a file written by \fBpcretest\fP to a different host and reload it
there, even if the new host has opposite endianness to the one on which the
pattern was compiled. For example, you can compile on an i86 machine and run on
a SPARC machine.
.P
File names for saving and reloading can be absolute or relative, but note that
the shell facility of expanding a file name that starts with a tilde (~) is not
available.
.P
The ability to save and reload files in \fBpcretest\fP is intended for testing
and experimentation. It is not intended for production use because only a
single pattern can be written to a file. Furthermore, there is no facility for
supplying custom character tables for use with a reloaded pattern. If the
original pattern was compiled with custom tables, an attempt to match a subject
string using a reloaded pattern is likely to cause \fBpcretest\fP to crash.
Finally, if you attempt to load a file that is not in the correct format, the
result is undefined.
.
.
.SH "SEE ALSO"
.rs
.sp
\fBpcre\fP(3), \fBpcreapi\fP(3), \fBpcrecallout\fP(3), \fBpcrematching\fP(3),
\fBpcrepartial\fP(d), \fBpcrepattern\fP(3), \fBpcreprecompile\fP(3).
.
.
.SH AUTHOR
.rs
.sp
.nf
Philip Hazel
University Computing Service
Cambridge CB2 3QH, England.
.fi
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 26 September 2009
Copyright (c) 1997-2009 University of Cambridge.
.fi
usr/share/man/man1/pcre-config.1000064400000003555150403561450012357 0ustar00.TH PCRE-CONFIG 1
.SH NAME
pcre-config - program to return PCRE configuration
.SH SYNOPSIS
.rs
.sp
.B pcre-config  [--prefix] [--exec-prefix] [--version] [--libs]
.ti +5n
.B              [--libs-posix] [--cflags] [--cflags-posix]
.
.
.SH DESCRIPTION
.rs
.sp
\fBpcre-config\fP returns the configuration of the installed PCRE
libraries and the options required to compile a program to use them.
.
.
.SH OPTIONS
.rs
.TP 10
\fB--prefix\fP
Writes the directory prefix used in the PCRE installation for architecture
independent files (\fI/usr\fP on many systems, \fI/usr/local\fP on some
systems) to the standard output.
.TP 10
\fB--exec-prefix\fP
Writes the directory prefix used in the PCRE installation for architecture
dependent files (normally the same as \fB--prefix\fP) to the standard output.
.TP 10
\fB--version\fP
Writes the version number of the installed PCRE libraries to the standard
output.
.TP 10
\fB--libs\fP
Writes to the standard output the command line options required to link
with PCRE (\fB-lpcre\fP on many systems).
.TP 10
\fB--libs-posix\fP
Writes to the standard output the command line options required to link with
the PCRE posix emulation library (\fB-lpcreposix\fP \fB-lpcre\fP on many
systems).
.TP 10
\fB--cflags\fP
Writes to the standard output the command line options required to compile
files that use PCRE (this may include some \fB-I\fP options, but is blank on
many systems).
.TP 10
\fB--cflags-posix\fP
Writes to the standard output the command line options required to compile
files that use the PCRE posix emulation library (this may include some \fB-I\fP
options, but is blank on many systems).
.
.
.SH "SEE ALSO"
.rs
.sp
\fBpcre(3)\fP
.
.
.SH AUTHOR
.rs
.sp
This manual page was originally written by Mark Baker for the Debian GNU/Linux
system. It has been slightly revised as a generic PCRE man page.
.
.
.SH REVISION
.rs
.sp
.nf
Last updated: 18 April 2007
.fi
usr/share/doc/alt-pcre802/ChangeLog000064400000550627150403561450012761 0ustar00ChangeLog for PCRE
------------------

Version 8.02 19-Mar-2010
------------------------

1.  The Unicode data tables have been updated to Unicode 5.2.0.

2.  Added the option --libs-cpp to pcre-config, but only when C++ support is
    configured.

3.  Updated the licensing terms in the pcregexp.pas file, as agreed with the
    original author of that file, following a query about its status.

4.  On systems that do not have stdint.h (e.g. Solaris), check for and include
    inttypes.h instead. This fixes a bug that was introduced by change 8.01/8.

5.  A pattern such as (?&t)*+(?(DEFINE)(?<t>.)) which has a possessive
    quantifier applied to a forward-referencing subroutine call, could compile
    incorrect code or give the error "internal error: previously-checked
    referenced subpattern not found".

6.  Both MS Visual Studio and Symbian OS have problems with initializing
    variables to point to external functions. For these systems, therefore,
    pcre_malloc etc. are now initialized to local functions that call the
    relevant global functions.

7.  There were two entries missing in the vectors called coptable and poptable
    in pcre_dfa_exec.c. This could lead to memory accesses outsize the vectors.
    I've fixed the data, and added a kludgy way of testing at compile time that
    the lengths are correct (equal to the number of opcodes).

8.  Following on from 7, I added a similar kludge to check the length of the
    eint vector in pcreposix.c.

9.  Error texts for pcre_compile() are held as one long string to avoid too
    much relocation at load time. To find a text, the string is searched,
    counting zeros. There was no check for running off the end of the string,
    which could happen if a new error number was added without updating the
    string.

10. \K gave a compile-time error if it appeared in a lookbehind assersion.

11. \K was not working if it appeared in an atomic group or in a group that
    was called as a "subroutine", or in an assertion. Perl 5.11 documents that
    \K is "not well defined" if used in an assertion. PCRE now accepts it if
    the assertion is positive, but not if it is negative.

12. Change 11 fortuitously reduced the size of the stack frame used in the
    "match()" function of pcre_exec.c by one pointer. Forthcoming
    implementation of support for (*MARK) will need an extra pointer on the
    stack; I have reserved it now, so that the stack frame size does not
    decrease.

13. A pattern such as (?P<L1>(?P<L2>0)|(?P>L2)(?P>L1)) in which the only other
    item in branch that calls a recursion is a subroutine call - as in the
    second branch in the above example - was incorrectly given the compile-
    time error "recursive call could loop indefinitely" because pcre_compile()
    was not correctly checking the subroutine for matching a non-empty string.

14. The checks for overrunning compiling workspace could trigger after an
    overrun had occurred. This is a "should never occur" error, but it can be
    triggered by pathological patterns such as hundreds of nested parentheses.
    The checks now trigger 100 bytes before the end of the workspace.

15. Fix typo in configure.ac: "srtoq" should be "strtoq".


Version 8.01 19-Jan-2010
------------------------

1.  If a pattern contained a conditional subpattern with only one branch (in
    particular, this includes all (*DEFINE) patterns), a call to pcre_study()
    computed the wrong minimum data length (which is of course zero for such
    subpatterns). This could cause incorrect "no match" results.

2.  For patterns such as (?i)a(?-i)b|c where an option setting at the start of
    the pattern is reset in the first branch, pcre_compile() failed with
    "internal error: code overflow at offset...". This happened only when
    the reset was to the original external option setting. (An optimization
    abstracts leading options settings into an external setting, which was the
    cause of this.)

3.  A pattern such as ^(?!a(*SKIP)b) where a negative assertion contained one
    of the verbs SKIP, PRUNE, or COMMIT, did not work correctly. When the
    assertion pattern did not match (meaning that the assertion was true), it
    was incorrectly treated as false if the SKIP had been reached during the
    matching. This also applied to assertions used as conditions.

4.  If an item that is not supported by pcre_dfa_exec() was encountered in an
    assertion subpattern, including such a pattern used as a condition,
    unpredictable results occurred, instead of the error return
    PCRE_ERROR_DFA_UITEM.

5.  The C++ GlobalReplace function was not working like Perl for the special
    situation when an empty string is matched. It now does the fancy magic
    stuff that is necessary.

6.  In pcre_internal.h, obsolete includes to setjmp.h and stdarg.h have been
    removed. (These were left over from very, very early versions of PCRE.)

7.  Some cosmetic changes to the code to make life easier when compiling it
    as part of something else:

    (a) Change DEBUG to PCRE_DEBUG.

    (b) In pcre_compile(), rename the member of the "branch_chain" structure
        called "current" as "current_branch", to prevent a collision with the
        Linux macro when compiled as a kernel module.

    (c) In pcre_study(), rename the function set_bit() as set_table_bit(), to
        prevent a collision with the Linux macro when compiled as a kernel
        module.

8.  In pcre_compile() there are some checks for integer overflows that used to
    cast potentially large values to (double). This has been changed to that
    when building, a check for int64_t is made, and if it is found, it is used
    instead, thus avoiding the use of floating point arithmetic. (There is no
    other use of FP in PCRE.) If int64_t is not found, the fallback is to
    double.

9.  Added two casts to avoid signed/unsigned warnings from VS Studio Express
    2005 (difference between two addresses compared to an unsigned value).

10. Change the standard AC_CHECK_LIB test for libbz2 in configure.ac to a
    custom one, because of the following reported problem in Windows:

      - libbz2 uses the Pascal calling convention (WINAPI) for the functions
          under Win32.
      - The standard autoconf AC_CHECK_LIB fails to include "bzlib.h",
          therefore missing the function definition.
      - The compiler thus generates a "C" signature for the test function.
      - The linker fails to find the "C" function.
      - PCRE fails to configure if asked to do so against libbz2.

11. When running libtoolize from libtool-2.2.6b as part of autogen.sh, these
    messages were output:

      Consider adding `AC_CONFIG_MACRO_DIR([m4])' to configure.ac and
      rerunning libtoolize, to keep the correct libtool macros in-tree.
      Consider adding `-I m4' to ACLOCAL_AMFLAGS in Makefile.am.

    I have done both of these things.

12. Although pcre_dfa_exec() does not use nearly as much stack as pcre_exec()
    most of the time, it *can* run out if it is given a pattern that contains a
    runaway infinite recursion. I updated the discussion in the pcrestack man
    page.

13. Now that we have gone to the x.xx style of version numbers, the minor
    version may start with zero. Using 08 or 09 is a bad idea because users
    might check the value of PCRE_MINOR in their code, and 08 or 09 may be
    interpreted as invalid octal numbers. I've updated the previous comment in
    configure.ac, and also added a check that gives an error if 08 or 09 are
    used.

14. Change 8.00/11 was not quite complete: code had been accidentally omitted,
    causing partial matching to fail when the end of the subject matched \W
    in a UTF-8 pattern where \W was quantified with a minimum of 3.

15. There were some discrepancies between the declarations in pcre_internal.h
    of _pcre_is_newline(), _pcre_was_newline(), and _pcre_valid_utf8() and
    their definitions. The declarations used "const uschar *" and the
    definitions used USPTR. Even though USPTR is normally defined as "const
    unsigned char *" (and uschar is typedeffed as "unsigned char"), it was
    reported that: "This difference in casting confuses some C++ compilers, for
    example, SunCC recognizes above declarations as different functions and
    generates broken code for hbpcre." I have changed the declarations to use
    USPTR.

16. GNU libtool is named differently on some systems. The autogen.sh script now
    tries several variants such as glibtoolize (MacOSX) and libtoolize1x
    (FreeBSD).

17. Applied Craig's patch that fixes an HP aCC compile error in pcre 8.00
    (strtoXX undefined when compiling pcrecpp.cc). The patch contains this
    comment: "Figure out how to create a longlong from a string: strtoll and
    equivalent. It's not enough to call AC_CHECK_FUNCS: hpux has a strtoll, for
    instance, but it only takes 2 args instead of 3!"

18. A subtle bug concerned with back references has been fixed by a change of
    specification, with a corresponding code fix. A pattern such as
    ^(xa|=?\1a)+$ which contains a back reference inside the group to which it
    refers, was giving matches when it shouldn't. For example, xa=xaaa would
    match that pattern. Interestingly, Perl (at least up to 5.11.3) has the
    same bug. Such groups have to be quantified to be useful, or contained
    inside another quantified group. (If there's no repetition, the reference
    can never match.) The problem arises because, having left the group and
    moved on to the rest of the pattern, a later failure that backtracks into
    the group uses the captured value from the final iteration of the group
    rather than the correct earlier one. I have fixed this in PCRE by forcing
    any group that contains a reference to itself to be an atomic group; that
    is, there cannot be any backtracking into it once it has completed. This is
    similar to recursive and subroutine calls.


Version 8.00 19-Oct-09
----------------------

1.  The table for translating pcre_compile() error codes into POSIX error codes
    was out-of-date, and there was no check on the pcre_compile() error code
    being within the table. This could lead to an OK return being given in
    error.

2.  Changed the call to open a subject file in pcregrep from fopen(pathname,
    "r") to fopen(pathname, "rb"), which fixed a problem with some of the tests
    in a Windows environment.

3.  The pcregrep --count option prints the count for each file even when it is
    zero, as does GNU grep. However, pcregrep was also printing all files when
    --files-with-matches was added. Now, when both options are given, it prints
    counts only for those files that have at least one match. (GNU grep just
    prints the file name in this circumstance, but including the count seems
    more useful - otherwise, why use --count?) Also ensured that the
    combination -clh just lists non-zero counts, with no names.

4.  The long form of the pcregrep -F option was incorrectly implemented as
    --fixed_strings instead of --fixed-strings. This is an incompatible change,
    but it seems right to fix it, and I didn't think it was worth preserving
    the old behaviour.

5.  The command line items --regex=pattern and --regexp=pattern were not
    recognized by pcregrep, which required --regex pattern or --regexp pattern
    (with a space rather than an '='). The man page documented the '=' forms,
    which are compatible with GNU grep; these now work.

6.  No libpcreposix.pc file was created for pkg-config; there was just
    libpcre.pc and libpcrecpp.pc. The omission has been rectified.

7.  Added #ifndef SUPPORT_UCP into the pcre_ucd.c module, to reduce its size
    when UCP support is not needed, by modifying the Python script that
    generates it from Unicode data files. This should not matter if the module
    is correctly used as a library, but I received one complaint about 50K of
    unwanted data. My guess is that the person linked everything into his
    program rather than using a library. Anyway, it does no harm.

8.  A pattern such as /\x{123}{2,2}+/8 was incorrectly compiled; the trigger
    was a minimum greater than 1 for a wide character in a possessive
    repetition. The same bug could also affect patterns like /(\x{ff}{0,2})*/8
    which had an unlimited repeat of a nested, fixed maximum repeat of a wide
    character. Chaos in the form of incorrect output or a compiling loop could
    result.

9.  The restrictions on what a pattern can contain when partial matching is
    requested for pcre_exec() have been removed. All patterns can now be
    partially matched by this function. In addition, if there are at least two
    slots in the offset vector, the offset of the earliest inspected character
    for the match and the offset of the end of the subject are set in them when
    PCRE_ERROR_PARTIAL is returned.

10. Partial matching has been split into two forms: PCRE_PARTIAL_SOFT, which is
    synonymous with PCRE_PARTIAL, for backwards compatibility, and
    PCRE_PARTIAL_HARD, which causes a partial match to supersede a full match,
    and may be more useful for multi-segment matching.

11. Partial matching with pcre_exec() is now more intuitive. A partial match
    used to be given if ever the end of the subject was reached; now it is
    given only if matching could not proceed because another character was
    needed. This makes a difference in some odd cases such as Z(*FAIL) with the
    string "Z", which now yields "no match" instead of "partial match". In the
    case of pcre_dfa_exec(), "no match" is given if every matching path for the
    final character ended with (*FAIL).

12. Restarting a match using pcre_dfa_exec() after a partial match did not work
    if the pattern had a "must contain" character that was already found in the
    earlier partial match, unless partial matching was again requested. For
    example, with the pattern /dog.(body)?/, the "must contain" character is
    "g". If the first part-match was for the string "dog", restarting with
    "sbody" failed. This bug has been fixed.

13. The string returned by pcre_dfa_exec() after a partial match has been
    changed so that it starts at the first inspected character rather than the
    first character of the match. This makes a difference only if the pattern
    starts with a lookbehind assertion or \b or \B (\K is not supported by
    pcre_dfa_exec()). It's an incompatible change, but it makes the two
    matching functions compatible, and I think it's the right thing to do.

14. Added a pcredemo man page, created automatically from the pcredemo.c file,
    so that the demonstration program is easily available in environments where
    PCRE has not been installed from source.

15. Arranged to add -DPCRE_STATIC to cflags in libpcre.pc, libpcreposix.cp,
    libpcrecpp.pc and pcre-config when PCRE is not compiled as a shared
    library.

16. Added REG_UNGREEDY to the pcreposix interface, at the request of a user.
    It maps to PCRE_UNGREEDY. It is not, of course, POSIX-compatible, but it
    is not the first non-POSIX option to be added. Clearly some people find
    these options useful.

17. If a caller to the POSIX matching function regexec() passes a non-zero
    value for nmatch with a NULL value for pmatch, the value of
    nmatch is forced to zero.

18. RunGrepTest did not have a test for the availability of the -u option of
    the diff command, as RunTest does. It now checks in the same way as
    RunTest, and also checks for the -b option.

19. If an odd number of negated classes containing just a single character
    interposed, within parentheses, between a forward reference to a named
    subpattern and the definition of the subpattern, compilation crashed with
    an internal error, complaining that it could not find the referenced
    subpattern. An example of a crashing pattern is /(?&A)(([^m])(?<A>))/.
    [The bug was that it was starting one character too far in when skipping
    over the character class, thus treating the ] as data rather than
    terminating the class. This meant it could skip too much.]

20. Added PCRE_NOTEMPTY_ATSTART in order to be able to correctly implement the
    /g option in pcretest when the pattern contains \K, which makes it possible
    to have an empty string match not at the start, even when the pattern is
    anchored. Updated pcretest and pcredemo to use this option.

21. If the maximum number of capturing subpatterns in a recursion was greater
    than the maximum at the outer level, the higher number was returned, but
    with unset values at the outer level. The correct (outer level) value is
    now given.

22. If (*ACCEPT) appeared inside capturing parentheses, previous releases of
    PCRE did not set those parentheses (unlike Perl). I have now found a way to
    make it do so. The string so far is captured, making this feature
    compatible with Perl.

23. The tests have been re-organized, adding tests 11 and 12, to make it
    possible to check the Perl 5.10 features against Perl 5.10.

24. Perl 5.10 allows subroutine calls in lookbehinds, as long as the subroutine
    pattern matches a fixed length string. PCRE did not allow this; now it
    does. Neither allows recursion.

25. I finally figured out how to implement a request to provide the minimum
    length of subject string that was needed in order to match a given pattern.
    (It was back references and recursion that I had previously got hung up
    on.) This code has now been added to pcre_study(); it finds a lower bound
    to the length of subject needed. It is not necessarily the greatest lower
    bound, but using it to avoid searching strings that are too short does give
    some useful speed-ups. The value is available to calling programs via
    pcre_fullinfo().

26. While implementing 25, I discovered to my embarrassment that pcretest had
    not been passing the result of pcre_study() to pcre_dfa_exec(), so the
    study optimizations had never been tested with that matching function.
    Oops. What is worse, even when it was passed study data, there was a bug in
    pcre_dfa_exec() that meant it never actually used it. Double oops. There
    were also very few tests of studied patterns with pcre_dfa_exec().

27. If (?| is used to create subpatterns with duplicate numbers, they are now
    allowed to have the same name, even if PCRE_DUPNAMES is not set. However,
    on the other side of the coin, they are no longer allowed to have different
    names, because these cannot be distinguished in PCRE, and this has caused
    confusion. (This is a difference from Perl.)

28. When duplicate subpattern names are present (necessarily with different
    numbers, as required by 27 above), and a test is made by name in a
    conditional pattern, either for a subpattern having been matched, or for
    recursion in such a pattern, all the associated numbered subpatterns are
    tested, and the overall condition is true if the condition is true for any
    one of them. This is the way Perl works, and is also more like the way
    testing by number works.


Version 7.9 11-Apr-09
---------------------

1.  When building with support for bzlib/zlib (pcregrep) and/or readline
    (pcretest), all targets were linked against these libraries. This included
    libpcre, libpcreposix, and libpcrecpp, even though they do not use these
    libraries. This caused unwanted dependencies to be created. This problem
    has been fixed, and now only pcregrep is linked with bzlib/zlib and only
    pcretest is linked with readline.

2.  The "typedef int BOOL" in pcre_internal.h that was included inside the
    "#ifndef FALSE" condition by an earlier change (probably 7.8/18) has been
    moved outside it again, because FALSE and TRUE are already defined in AIX,
    but BOOL is not.

3.  The pcre_config() function was treating the PCRE_MATCH_LIMIT and
    PCRE_MATCH_LIMIT_RECURSION values as ints, when they should be long ints.

4.  The pcregrep documentation said spaces were inserted as well as colons (or
    hyphens) following file names and line numbers when outputting matching
    lines. This is not true; no spaces are inserted. I have also clarified the
    wording for the --colour (or --color) option.

5.  In pcregrep, when --colour was used with -o, the list of matching strings
    was not coloured; this is different to GNU grep, so I have changed it to be
    the same.

6.  When --colo(u)r was used in pcregrep, only the first matching substring in
    each matching line was coloured. Now it goes on to look for further matches
    of any of the test patterns, which is the same behaviour as GNU grep.

7.  A pattern that could match an empty string could cause pcregrep to loop; it
    doesn't make sense to accept an empty string match in pcregrep, so I have
    locked it out (using PCRE's PCRE_NOTEMPTY option). By experiment, this
    seems to be how GNU grep behaves.

8.  The pattern (?(?=.*b)b|^) was incorrectly compiled as "match must be at
    start or after a newline", because the conditional assertion was not being
    correctly handled. The rule now is that both the assertion and what follows
    in the first alternative must satisfy the test.

9.  If auto-callout was enabled in a pattern with a conditional group whose
    condition was an assertion, PCRE could crash during matching, both with
    pcre_exec() and pcre_dfa_exec().

10. The PCRE_DOLLAR_ENDONLY option was not working when pcre_dfa_exec() was
    used for matching.

11. Unicode property support in character classes was not working for
    characters (bytes) greater than 127 when not in UTF-8 mode.

12. Added the -M command line option to pcretest.

14. Added the non-standard REG_NOTEMPTY option to the POSIX interface.

15. Added the PCRE_NO_START_OPTIMIZE match-time option.

16. Added comments and documentation about mis-use of no_arg in the C++
    wrapper.

17. Implemented support for UTF-8 encoding in EBCDIC environments, a patch
    from Martin Jerabek that uses macro names for all relevant character and
    string constants.

18. Added to pcre_internal.h two configuration checks: (a) If both EBCDIC and
    SUPPORT_UTF8 are set, give an error; (b) If SUPPORT_UCP is set without
    SUPPORT_UTF8, define SUPPORT_UTF8. The "configure" script handles both of
    these, but not everybody uses configure.

19. A conditional group that had only one branch was not being correctly
    recognized as an item that could match an empty string. This meant that an
    enclosing group might also not be so recognized, causing infinite looping
    (and probably a segfault) for patterns such as ^"((?(?=[a])[^"])|b)*"$
    with the subject "ab", where knowledge that the repeated group can match
    nothing is needed in order to break the loop.

20. If a pattern that was compiled with callouts was matched using pcre_dfa_
    exec(), but without supplying a callout function, matching went wrong.

21. If PCRE_ERROR_MATCHLIMIT occurred during a recursion, there was a memory
    leak if the size of the offset vector was greater than 30. When the vector
    is smaller, the saved offsets during recursion go onto a local stack
    vector, but for larger vectors malloc() is used. It was failing to free
    when the recursion yielded PCRE_ERROR_MATCH_LIMIT (or any other "abnormal"
    error, in fact).

22. There was a missing #ifdef SUPPORT_UTF8 round one of the variables in the
    heapframe that is used only when UTF-8 support is enabled. This caused no
    problem, but was untidy.

23. Steven Van Ingelgem's patch to CMakeLists.txt to change the name
    CMAKE_BINARY_DIR to PROJECT_BINARY_DIR so that it works when PCRE is
    included within another project.

24. Steven Van Ingelgem's patches to add more options to the CMake support,
    slightly modified by me:

      (a) PCRE_BUILD_TESTS can be set OFF not to build the tests, including
          not building pcregrep.

      (b) PCRE_BUILD_PCREGREP can be see OFF not to build pcregrep, but only
          if PCRE_BUILD_TESTS is also set OFF, because the tests use pcregrep.

25. Forward references, both numeric and by name, in patterns that made use of
    duplicate group numbers, could behave incorrectly or give incorrect errors,
    because when scanning forward to find the reference group, PCRE was not
    taking into account the duplicate group numbers. A pattern such as
    ^X(?3)(a)(?|(b)|(q))(Y) is an example.

26. Changed a few more instances of "const unsigned char *" to USPTR, making
    the feature of a custom pointer more persuasive (as requested by a user).

27. Wrapped the definitions of fileno and isatty for Windows, which appear in
    pcretest.c, inside #ifndefs, because it seems they are sometimes already
    pre-defined.

28. Added support for (*UTF8) at the start of a pattern.

29. Arrange for flags added by the "release type" setting in CMake to be shown
    in the configuration summary.


Version 7.8 05-Sep-08
---------------------

1.  Replaced UCP searching code with optimized version as implemented for Ad
    Muncher (http://www.admuncher.com/) by Peter Kankowski. This uses a two-
    stage table and inline lookup instead of a function, giving speed ups of 2
    to 5 times on some simple patterns that I tested. Permission was given to
    distribute the MultiStage2.py script that generates the tables (it's not in
    the tarball, but is in the Subversion repository).

2.  Updated the Unicode datatables to Unicode 5.1.0. This adds yet more
    scripts.

3.  Change 12 for 7.7 introduced a bug in pcre_study() when a pattern contained
    a group with a zero qualifier. The result of the study could be incorrect,
    or the function might crash, depending on the pattern.

4.  Caseless matching was not working for non-ASCII characters in back
    references. For example, /(\x{de})\1/8i was not matching \x{de}\x{fe}.
    It now works when Unicode Property Support is available.

5.  In pcretest, an escape such as \x{de} in the data was always generating
    a UTF-8 string, even in non-UTF-8 mode. Now it generates a single byte in
    non-UTF-8 mode. If the value is greater than 255, it gives a warning about
    truncation.

6.  Minor bugfix in pcrecpp.cc (change "" == ... to NULL == ...).

7.  Added two (int) casts to pcregrep when printing the difference of two
    pointers, in case they are 64-bit values.

8.  Added comments about Mac OS X stack usage to the pcrestack man page and to
    test 2 if it fails.

9.  Added PCRE_CALL_CONVENTION just before the names of all exported functions,
    and a #define of that name to empty if it is not externally set. This is to
    allow users of MSVC to set it if necessary.

10. The PCRE_EXP_DEFN macro which precedes exported functions was missing from
    the convenience functions in the pcre_get.c source file.

11. An option change at the start of a pattern that had top-level alternatives
    could cause overwriting and/or a crash. This command provoked a crash in
    some environments:

      printf "/(?i)[\xc3\xa9\xc3\xbd]|[\xc3\xa9\xc3\xbdA]/8\n" | pcretest

    This potential security problem was recorded as CVE-2008-2371.

12. For a pattern where the match had to start at the beginning or immediately
    after a newline (e.g /.*anything/ without the DOTALL flag), pcre_exec() and
    pcre_dfa_exec() could read past the end of the passed subject if there was
    no match. To help with detecting such bugs (e.g. with valgrind), I modified
    pcretest so that it places the subject at the end of its malloc-ed buffer.

13. The change to pcretest in 12 above threw up a couple more cases when pcre_
    exec() might read past the end of the data buffer in UTF-8 mode.

14. A similar bug to 7.3/2 existed when the PCRE_FIRSTLINE option was set and
    the data contained the byte 0x85 as part of a UTF-8 character within its
    first line. This applied both to normal and DFA matching.

15. Lazy qualifiers were not working in some cases in UTF-8 mode. For example,
    /^[^d]*?$/8 failed to match "abc".

16. Added a missing copyright notice to pcrecpp_internal.h.

17. Make it more clear in the documentation that values returned from
    pcre_exec() in ovector are byte offsets, not character counts.

18. Tidied a few places to stop certain compilers from issuing warnings.

19. Updated the Virtual Pascal + BCC files to compile the latest v7.7, as
    supplied by Stefan Weber. I made a further small update for 7.8 because
    there is a change of source arrangements: the pcre_searchfuncs.c module is
    replaced by pcre_ucd.c.


Version 7.7 07-May-08
---------------------

1.  Applied Craig's patch to sort out a long long problem: "If we can't convert
    a string to a long long, pretend we don't even have a long long." This is
    done by checking for the strtoq, strtoll, and _strtoi64 functions.

2.  Applied Craig's patch to pcrecpp.cc to restore ABI compatibility with
    pre-7.6 versions, which defined a global no_arg variable instead of putting
    it in the RE class. (See also #8 below.)

3.  Remove a line of dead code, identified by coverity and reported by Nuno
    Lopes.

4.  Fixed two related pcregrep bugs involving -r with --include or --exclude:

    (1) The include/exclude patterns were being applied to the whole pathnames
        of files, instead of just to the final components.

    (2) If there was more than one level of directory, the subdirectories were
        skipped unless they satisfied the include/exclude conditions. This is
        inconsistent with GNU grep (and could even be seen as contrary to the
        pcregrep specification - which I improved to make it absolutely clear).
        The action now is always to scan all levels of directory, and just
        apply the include/exclude patterns to regular files.

5.  Added the --include_dir and --exclude_dir patterns to pcregrep, and used
    --exclude_dir in the tests to avoid scanning .svn directories.

6.  Applied Craig's patch to the QuoteMeta function so that it escapes the
    NUL character as backslash + 0 rather than backslash + NUL, because PCRE
    doesn't support NULs in patterns.

7.  Added some missing "const"s to declarations of static tables in
    pcre_compile.c and pcre_dfa_exec.c.

8.  Applied Craig's patch to pcrecpp.cc to fix a problem in OS X that was
    caused by fix #2  above. (Subsequently also a second patch to fix the
    first patch. And a third patch - this was a messy problem.)

9.  Applied Craig's patch to remove the use of push_back().

10. Applied Alan Lehotsky's patch to add REG_STARTEND support to the POSIX
    matching function regexec().

11. Added support for the Oniguruma syntax \g<name>, \g<n>, \g'name', \g'n',
    which, however, unlike Perl's \g{...}, are subroutine calls, not back
    references. PCRE supports relative numbers with this syntax (I don't think
    Oniguruma does).

12. Previously, a group with a zero repeat such as (...){0} was completely
    omitted from the compiled regex. However, this means that if the group
    was called as a subroutine from elsewhere in the pattern, things went wrong
    (an internal error was given). Such groups are now left in the compiled
    pattern, with a new opcode that causes them to be skipped at execution
    time.

13. Added the PCRE_JAVASCRIPT_COMPAT option. This makes the following changes
    to the way PCRE behaves:

    (a) A lone ] character is dis-allowed (Perl treats it as data).

    (b) A back reference to an unmatched subpattern matches an empty string
        (Perl fails the current match path).

    (c) A data ] in a character class must be notated as \] because if the
        first data character in a class is ], it defines an empty class. (In
        Perl it is not possible to have an empty class.) The empty class []
        never matches; it forces failure and is equivalent to (*FAIL) or (?!).
        The negative empty class [^] matches any one character, independently
        of the DOTALL setting.

14. A pattern such as /(?2)[]a()b](abc)/ which had a forward reference to a
    non-existent subpattern following a character class starting with ']' and
    containing () gave an internal compiling error instead of "reference to
    non-existent subpattern". Fortunately, when the pattern did exist, the
    compiled code was correct. (When scanning forwards to check for the
    existencd of the subpattern, it was treating the data ']' as terminating
    the class, so got the count wrong. When actually compiling, the reference
    was subsequently set up correctly.)

15. The "always fail" assertion (?!) is optimzed to (*FAIL) by pcre_compile;
    it was being rejected as not supported by pcre_dfa_exec(), even though
    other assertions are supported. I have made pcre_dfa_exec() support
    (*FAIL).

16. The implementation of 13c above involved the invention of a new opcode,
    OP_ALLANY, which is like OP_ANY but doesn't check the /s flag. Since /s
    cannot be changed at match time, I realized I could make a small
    improvement to matching performance by compiling OP_ALLANY instead of
    OP_ANY for "." when DOTALL was set, and then removing the runtime tests
    on the OP_ANY path.

17. Compiling pcretest on Windows with readline support failed without the
    following two fixes: (1) Make the unistd.h include conditional on
    HAVE_UNISTD_H; (2) #define isatty and fileno as _isatty and _fileno.

18. Changed CMakeLists.txt and cmake/FindReadline.cmake to arrange for the
    ncurses library to be included for pcretest when ReadLine support is
    requested, but also to allow for it to be overridden. This patch came from
    Daniel Bergström.

19. There was a typo in the file ucpinternal.h where f0_rangeflag was defined
    as 0x00f00000 instead of 0x00800000. Luckily, this would not have caused
    any errors with the current Unicode tables. Thanks to Peter Kankowski for
    spotting this.


Version 7.6 28-Jan-08
---------------------

1.  A character class containing a very large number of characters with
    codepoints greater than 255 (in UTF-8 mode, of course) caused a buffer
    overflow.

2.  Patch to cut out the "long long" test in pcrecpp_unittest when
    HAVE_LONG_LONG is not defined.

3.  Applied Christian Ehrlicher's patch to update the CMake build files to
    bring them up to date and include new features. This patch includes:

    - Fixed PH's badly added libz and libbz2 support.
    - Fixed a problem with static linking.
    - Added pcredemo. [But later removed - see 7 below.]
    - Fixed dftables problem and added an option.
    - Added a number of HAVE_XXX tests, including HAVE_WINDOWS_H and
        HAVE_LONG_LONG.
    - Added readline support for pcretest.
    - Added an listing of the option settings after cmake has run.

4.  A user submitted a patch to Makefile that makes it easy to create
    "pcre.dll" under mingw when using Configure/Make. I added stuff to
    Makefile.am that cause it to include this special target, without
    affecting anything else. Note that the same mingw target plus all
    the other distribution libraries and programs are now supported
    when configuring with CMake (see 6 below) instead of with
    Configure/Make.

5.  Applied Craig's patch that moves no_arg into the RE class in the C++ code.
    This is an attempt to solve the reported problem "pcrecpp::no_arg is not
    exported in the Windows port". It has not yet been confirmed that the patch
    solves the problem, but it does no harm.

6.  Applied Sheri's patch to CMakeLists.txt to add NON_STANDARD_LIB_PREFIX and
    NON_STANDARD_LIB_SUFFIX for dll names built with mingw when configured
    with CMake, and also correct the comment about stack recursion.

7.  Remove the automatic building of pcredemo from the ./configure system and
    from CMakeLists.txt. The whole idea of pcredemo.c is that it is an example
    of a program that users should build themselves after PCRE is installed, so
    building it automatically is not really right. What is more, it gave
    trouble in some build environments.

8.  Further tidies to CMakeLists.txt from Sheri and Christian.


Version 7.5 10-Jan-08
---------------------

1.  Applied a patch from Craig: "This patch makes it possible to 'ignore'
    values in parens when parsing an RE using the C++ wrapper."

2.  Negative specials like \S did not work in character classes in UTF-8 mode.
    Characters greater than 255 were excluded from the class instead of being
    included.

3.  The same bug as (2) above applied to negated POSIX classes such as
    [:^space:].

4.  PCRECPP_STATIC was referenced in pcrecpp_internal.h, but nowhere was it
    defined or documented. It seems to have been a typo for PCRE_STATIC, so
    I have changed it.

5.  The construct (?&) was not diagnosed as a syntax error (it referenced the
    first named subpattern) and a construct such as (?&a) would reference the
    first named subpattern whose name started with "a" (in other words, the
    length check was missing). Both these problems are fixed. "Subpattern name
    expected" is now given for (?&) (a zero-length name), and this patch also
    makes it give the same error for \k'' (previously it complained that that
    was a reference to a non-existent subpattern).

6.  The erroneous patterns (?+-a) and (?-+a) give different error messages;
    this is right because (?- can be followed by option settings as well as by
    digits. I have, however, made the messages clearer.

7.  Patterns such as (?(1)a|b) (a pattern that contains fewer subpatterns
    than the number used in the conditional) now cause a compile-time error.
    This is actually not compatible with Perl, which accepts such patterns, but
    treats the conditional as always being FALSE (as PCRE used to), but it
    seems to me that giving a diagnostic is better.

8.  Change "alphameric" to the more common word "alphanumeric" in comments
    and messages.

9.  Fix two occurrences of "backslash" in comments that should have been
    "backspace".

10. Remove two redundant lines of code that can never be obeyed (their function
    was moved elsewhere).

11. The program that makes PCRE's Unicode character property table had a bug
    which caused it to generate incorrect table entries for sequences of
    characters that have the same character type, but are in different scripts.
    It amalgamated them into a single range, with the script of the first of
    them. In other words, some characters were in the wrong script. There were
    thirteen such cases, affecting characters in the following ranges:

      U+002b0 - U+002c1
      U+0060c - U+0060d
      U+0061e - U+00612
      U+0064b - U+0065e
      U+0074d - U+0076d
      U+01800 - U+01805
      U+01d00 - U+01d77
      U+01d9b - U+01dbf
      U+0200b - U+0200f
      U+030fc - U+030fe
      U+03260 - U+0327f
      U+0fb46 - U+0fbb1
      U+10450 - U+1049d

12. The -o option (show only the matching part of a line) for pcregrep was not
    compatible with GNU grep in that, if there was more than one match in a
    line, it showed only the first of them. It now behaves in the same way as
    GNU grep.

13. If the -o and -v options were combined for pcregrep, it printed a blank
    line for every non-matching line. GNU grep prints nothing, and pcregrep now
    does the same. The return code can be used to tell if there were any
    non-matching lines.

14. Added --file-offsets and --line-offsets to pcregrep.

15. The pattern (?=something)(?R) was not being diagnosed as a potentially
    infinitely looping recursion. The bug was that positive lookaheads were not
    being skipped when checking for a possible empty match (negative lookaheads
    and both kinds of lookbehind were skipped).

16. Fixed two typos in the Windows-only code in pcregrep.c, and moved the
    inclusion of <windows.h> to before rather than after the definition of
    INVALID_FILE_ATTRIBUTES (patch from David Byron).

17. Specifying a possessive quantifier with a specific limit for a Unicode
    character property caused pcre_compile() to compile bad code, which led at
    runtime to PCRE_ERROR_INTERNAL (-14). Examples of patterns that caused this
    are: /\p{Zl}{2,3}+/8 and /\p{Cc}{2}+/8. It was the possessive "+" that
    caused the error; without that there was no problem.

18. Added --enable-pcregrep-libz and --enable-pcregrep-libbz2.

19. Added --enable-pcretest-libreadline.

20. In pcrecpp.cc, the variable 'count' was incremented twice in
    RE::GlobalReplace(). As a result, the number of replacements returned was
    double what it should be. I removed one of the increments, but Craig sent a
    later patch that removed the other one (the right fix) and added unit tests
    that check the return values (which was not done before).

21. Several CMake things:

    (1) Arranged that, when cmake is used on Unix, the libraries end up with
        the names libpcre and libpcreposix, not just pcre and pcreposix.

    (2) The above change means that pcretest and pcregrep are now correctly
        linked with the newly-built libraries, not previously installed ones.

    (3) Added PCRE_SUPPORT_LIBREADLINE, PCRE_SUPPORT_LIBZ, PCRE_SUPPORT_LIBBZ2.

22. In UTF-8 mode, with newline set to "any", a pattern such as .*a.*=.b.*
    crashed when matching a string such as a\x{2029}b (note that \x{2029} is a
    UTF-8 newline character). The key issue is that the pattern starts .*;
    this means that the match must be either at the beginning, or after a
    newline. The bug was in the code for advancing after a failed match and
    checking that the new position followed a newline. It was not taking
    account of UTF-8 characters correctly.

23. PCRE was behaving differently from Perl in the way it recognized POSIX
    character classes. PCRE was not treating the sequence [:...:] as a
    character class unless the ... were all letters. Perl, however, seems to
    allow any characters between [: and :], though of course it rejects as
    unknown any "names" that contain non-letters, because all the known class
    names consist only of letters. Thus, Perl gives an error for [[:1234:]],
    for example, whereas PCRE did not - it did not recognize a POSIX character
    class. This seemed a bit dangerous, so the code has been changed to be
    closer to Perl. The behaviour is not identical to Perl, because PCRE will
    diagnose an unknown class for, for example, [[:l\ower:]] where Perl will
    treat it as [[:lower:]]. However, PCRE does now give "unknown" errors where
    Perl does, and where it didn't before.

24. Rewrite so as to remove the single use of %n from pcregrep because in some
    Windows environments %n is disabled by default.


Version 7.4 21-Sep-07
---------------------

1.  Change 7.3/28 was implemented for classes by looking at the bitmap. This
    means that a class such as [\s] counted as "explicit reference to CR or
    LF". That isn't really right - the whole point of the change was to try to
    help when there was an actual mention of one of the two characters. So now
    the change happens only if \r or \n (or a literal CR or LF) character is
    encountered.

2.  The 32-bit options word was also used for 6 internal flags, but the numbers
    of both had grown to the point where there were only 3 bits left.
    Fortunately, there was spare space in the data structure, and so I have
    moved the internal flags into a new 16-bit field to free up more option
    bits.

3.  The appearance of (?J) at the start of a pattern set the DUPNAMES option,
    but did not set the internal JCHANGED flag - either of these is enough to
    control the way the "get" function works - but the PCRE_INFO_JCHANGED
    facility is supposed to tell if (?J) was ever used, so now (?J) at the
    start sets both bits.

4.  Added options (at build time, compile time, exec time) to change \R from
    matching any Unicode line ending sequence to just matching CR, LF, or CRLF.

5.  doc/pcresyntax.html was missing from the distribution.

6.  Put back the definition of PCRE_ERROR_NULLWSLIMIT, for backward
    compatibility, even though it is no longer used.

7.  Added macro for snprintf to pcrecpp_unittest.cc and also for strtoll and
    strtoull to pcrecpp.cc to select the available functions in WIN32 when the
    windows.h file is present (where different names are used). [This was
    reversed later after testing - see 16 below.]

8.  Changed all #include <config.h> to #include "config.h". There were also
    some further <pcre.h> cases that I changed to "pcre.h".

9.  When pcregrep was used with the --colour option, it missed the line ending
    sequence off the lines that it output.

10. It was pointed out to me that arrays of string pointers cause lots of
    relocations when a shared library is dynamically loaded. A technique of
    using a single long string with a table of offsets can drastically reduce
    these. I have refactored PCRE in four places to do this. The result is
    dramatic:

      Originally:                          290
      After changing UCP table:            187
      After changing error message table:   43
      After changing table of "verbs"       36
      After changing table of Posix names   22

    Thanks to the folks working on Gregex for glib for this insight.

11. --disable-stack-for-recursion caused compiling to fail unless -enable-
    unicode-properties was also set.

12. Updated the tests so that they work when \R is defaulted to ANYCRLF.

13. Added checks for ANY and ANYCRLF to pcrecpp.cc where it previously
    checked only for CRLF.

14. Added casts to pcretest.c to avoid compiler warnings.

15. Added Craig's patch to various pcrecpp modules to avoid compiler warnings.

16. Added Craig's patch to remove the WINDOWS_H tests, that were not working,
    and instead check for _strtoi64 explicitly, and avoid the use of snprintf()
    entirely. This removes changes made in 7 above.

17. The CMake files have been updated, and there is now more information about
    building with CMake in the NON-UNIX-USE document.


Version 7.3 28-Aug-07
---------------------

 1. In the rejigging of the build system that eventually resulted in 7.1, the
    line "#include <pcre.h>" was included in pcre_internal.h. The use of angle
    brackets there is not right, since it causes compilers to look for an
    installed pcre.h, not the version that is in the source that is being
    compiled (which of course may be different). I have changed it back to:

      #include "pcre.h"

    I have a vague recollection that the change was concerned with compiling in
    different directories, but in the new build system, that is taken care of
    by the VPATH setting the Makefile.

 2. The pattern .*$ when run in not-DOTALL UTF-8 mode with newline=any failed
    when the subject happened to end in the byte 0x85 (e.g. if the last
    character was \x{1ec5}). *Character* 0x85 is one of the "any" newline
    characters but of course it shouldn't be taken as a newline when it is part
    of another character. The bug was that, for an unlimited repeat of . in
    not-DOTALL UTF-8 mode, PCRE was advancing by bytes rather than by
    characters when looking for a newline.

 3. A small performance improvement in the DOTALL UTF-8 mode .* case.

 4. Debugging: adjusted the names of opcodes for different kinds of parentheses
    in debug output.

 5. Arrange to use "%I64d" instead of "%lld" and "%I64u" instead of "%llu" for
    long printing in the pcrecpp unittest when running under MinGW.

 6. ESC_K was left out of the EBCDIC table.

 7. Change 7.0/38 introduced a new limit on the number of nested non-capturing
    parentheses; I made it 1000, which seemed large enough. Unfortunately, the
    limit also applies to "virtual nesting" when a pattern is recursive, and in
    this case 1000 isn't so big. I have been able to remove this limit at the
    expense of backing off one optimization in certain circumstances. Normally,
    when pcre_exec() would call its internal match() function recursively and
    immediately return the result unconditionally, it uses a "tail recursion"
    feature to save stack. However, when a subpattern that can match an empty
    string has an unlimited repetition quantifier, it no longer makes this
    optimization. That gives it a stack frame in which to save the data for
    checking that an empty string has been matched. Previously this was taken
    from the 1000-entry workspace that had been reserved. So now there is no
    explicit limit, but more stack is used.

 8. Applied Daniel's patches to solve problems with the import/export magic
    syntax that is required for Windows, and which was going wrong for the
    pcreposix and pcrecpp parts of the library. These were overlooked when this
    problem was solved for the main library.

 9. There were some crude static tests to avoid integer overflow when computing
    the size of patterns that contain repeated groups with explicit upper
    limits. As the maximum quantifier is 65535, the maximum group length was
    set at 30,000 so that the product of these two numbers did not overflow a
    32-bit integer. However, it turns out that people want to use groups that
    are longer than 30,000 bytes (though not repeat them that many times).
    Change 7.0/17 (the refactoring of the way the pattern size is computed) has
    made it possible to implement the integer overflow checks in a much more
    dynamic way, which I have now done. The artificial limitation on group
    length has been removed - we now have only the limit on the total length of
    the compiled pattern, which depends on the LINK_SIZE setting.

10. Fixed a bug in the documentation for get/copy named substring when
    duplicate names are permitted. If none of the named substrings are set, the
    functions return PCRE_ERROR_NOSUBSTRING (7); the doc said they returned an
    empty string.

11. Because Perl interprets \Q...\E at a high level, and ignores orphan \E
    instances, patterns such as [\Q\E] or [\E] or even [^\E] cause an error,
    because the ] is interpreted as the first data character and the
    terminating ] is not found. PCRE has been made compatible with Perl in this
    regard. Previously, it interpreted [\Q\E] as an empty class, and [\E] could
    cause memory overwriting.

10. Like Perl, PCRE automatically breaks an unlimited repeat after an empty
    string has been matched (to stop an infinite loop). It was not recognizing
    a conditional subpattern that could match an empty string if that
    subpattern was within another subpattern. For example, it looped when
    trying to match  (((?(1)X|))*)  but it was OK with  ((?(1)X|)*)  where the
    condition was not nested. This bug has been fixed.

12. A pattern like \X?\d or \P{L}?\d in non-UTF-8 mode could cause a backtrack
    past the start of the subject in the presence of bytes with the top bit
    set, for example "\x8aBCD".

13. Added Perl 5.10 experimental backtracking controls (*FAIL), (*F), (*PRUNE),
    (*SKIP), (*THEN), (*COMMIT), and (*ACCEPT).

14. Optimized (?!) to (*FAIL).

15. Updated the test for a valid UTF-8 string to conform to the later RFC 3629.
    This restricts code points to be within the range 0 to 0x10FFFF, excluding
    the "low surrogate" sequence 0xD800 to 0xDFFF. Previously, PCRE allowed the
    full range 0 to 0x7FFFFFFF, as defined by RFC 2279. Internally, it still
    does: it's just the validity check that is more restrictive.

16. Inserted checks for integer overflows during escape sequence (backslash)
    processing, and also fixed erroneous offset values for syntax errors during
    backslash processing.

17. Fixed another case of looking too far back in non-UTF-8 mode (cf 12 above)
    for patterns like [\PPP\x8a]{1,}\x80 with the subject "A\x80".

18. An unterminated class in a pattern like (?1)\c[ with a "forward reference"
    caused an overrun.

19. A pattern like (?:[\PPa*]*){8,} which had an "extended class" (one with
    something other than just ASCII characters) inside a group that had an
    unlimited repeat caused a loop at compile time (while checking to see
    whether the group could match an empty string).

20. Debugging a pattern containing \p or \P could cause a crash. For example,
    [\P{Any}] did so. (Error in the code for printing property names.)

21. An orphan \E inside a character class could cause a crash.

22. A repeated capturing bracket such as (A)? could cause a wild memory
    reference during compilation.

23. There are several functions in pcre_compile() that scan along a compiled
    expression for various reasons (e.g. to see if it's fixed length for look
    behind). There were bugs in these functions when a repeated \p or \P was
    present in the pattern. These operators have additional parameters compared
    with \d, etc, and these were not being taken into account when moving along
    the compiled data. Specifically:

    (a) A item such as \p{Yi}{3} in a lookbehind was not treated as fixed
        length.

    (b) An item such as \pL+ within a repeated group could cause crashes or
        loops.

    (c) A pattern such as \p{Yi}+(\P{Yi}+)(?1) could give an incorrect
        "reference to non-existent subpattern" error.

    (d) A pattern like (\P{Yi}{2}\277)? could loop at compile time.

24. A repeated \S or \W in UTF-8 mode could give wrong answers when multibyte
    characters were involved (for example /\S{2}/8g with "A\x{a3}BC").

25. Using pcregrep in multiline, inverted mode (-Mv) caused it to loop.

26. Patterns such as [\P{Yi}A] which include \p or \P and just one other
    character were causing crashes (broken optimization).

27. Patterns such as (\P{Yi}*\277)* (group with possible zero repeat containing
    \p or \P) caused a compile-time loop.

28. More problems have arisen in unanchored patterns when CRLF is a valid line
    break. For example, the unstudied pattern [\r\n]A does not match the string
    "\r\nA" because change 7.0/46 below moves the current point on by two
    characters after failing to match at the start. However, the pattern \nA
    *does* match, because it doesn't start till \n, and if [\r\n]A is studied,
    the same is true. There doesn't seem any very clean way out of this, but
    what I have chosen to do makes the common cases work: PCRE now takes note
    of whether there can be an explicit match for \r or \n anywhere in the
    pattern, and if so, 7.0/46 no longer applies. As part of this change,
    there's a new PCRE_INFO_HASCRORLF option for finding out whether a compiled
    pattern has explicit CR or LF references.

29. Added (*CR) etc for changing newline setting at start of pattern.


Version 7.2 19-Jun-07
---------------------

 1. If the fr_FR locale cannot be found for test 3, try the "french" locale,
    which is apparently normally available under Windows.

 2. Re-jig the pcregrep tests with different newline settings in an attempt
    to make them independent of the local environment's newline setting.

 3. Add code to configure.ac to remove -g from the CFLAGS default settings.

 4. Some of the "internals" tests were previously cut out when the link size
    was not 2, because the output contained actual offsets. The recent new
    "Z" feature of pcretest means that these can be cut out, making the tests
    usable with all link sizes.

 5. Implemented Stan Switzer's goto replacement for longjmp() when not using
    stack recursion. This gives a massive performance boost under BSD, but just
    a small improvement under Linux. However, it saves one field in the frame
    in all cases.

 6. Added more features from the forthcoming Perl 5.10:

    (a) (?-n) (where n is a string of digits) is a relative subroutine or
        recursion call. It refers to the nth most recently opened parentheses.

    (b) (?+n) is also a relative subroutine call; it refers to the nth next
        to be opened parentheses.

    (c) Conditions that refer to capturing parentheses can be specified
        relatively, for example, (?(-2)... or (?(+3)...

    (d) \K resets the start of the current match so that everything before
        is not part of it.

    (e) \k{name} is synonymous with \k<name> and \k'name' (.NET compatible).

    (f) \g{name} is another synonym - part of Perl 5.10's unification of
        reference syntax.

    (g) (?| introduces a group in which the numbering of parentheses in each
        alternative starts with the same number.

    (h) \h, \H, \v, and \V match horizontal and vertical whitespace.

 7. Added two new calls to pcre_fullinfo(): PCRE_INFO_OKPARTIAL and
    PCRE_INFO_JCHANGED.

 8. A pattern such as  (.*(.)?)*  caused pcre_exec() to fail by either not
    terminating or by crashing. Diagnosed by Viktor Griph; it was in the code
    for detecting groups that can match an empty string.

 9. A pattern with a very large number of alternatives (more than several
    hundred) was running out of internal workspace during the pre-compile
    phase, where pcre_compile() figures out how much memory will be needed. A
    bit of new cunning has reduced the workspace needed for groups with
    alternatives. The 1000-alternative test pattern now uses 12 bytes of
    workspace instead of running out of the 4096 that are available.

10. Inserted some missing (unsigned int) casts to get rid of compiler warnings.

11. Applied patch from Google to remove an optimization that didn't quite work.
    The report of the bug said:

      pcrecpp::RE("a*").FullMatch("aaa") matches, while
      pcrecpp::RE("a*?").FullMatch("aaa") does not, and
      pcrecpp::RE("a*?\\z").FullMatch("aaa") does again.

12. If \p or \P was used in non-UTF-8 mode on a character greater than 127
    it matched the wrong number of bytes.


Version 7.1 24-Apr-07
---------------------

 1. Applied Bob Rossi and Daniel G's patches to convert the build system to one
    that is more "standard", making use of automake and other Autotools. There
    is some re-arrangement of the files and adjustment of comments consequent
    on this.

 2. Part of the patch fixed a problem with the pcregrep tests. The test of -r
    for recursive directory scanning broke on some systems because the files
    are not scanned in any specific order and on different systems the order
    was different. A call to "sort" has been inserted into RunGrepTest for the
    approprate test as a short-term fix. In the longer term there may be an
    alternative.

 3. I had an email from Eric Raymond about problems translating some of PCRE's
    man pages to HTML (despite the fact that I distribute HTML pages, some
    people do their own conversions for various reasons). The problems
    concerned the use of low-level troff macros .br and .in. I have therefore
    removed all such uses from the man pages (some were redundant, some could
    be replaced by .nf/.fi pairs). The 132html script that I use to generate
    HTML has been updated to handle .nf/.fi and to complain if it encounters
    .br or .in.

 4. Updated comments in configure.ac that get placed in config.h.in and also
    arranged for config.h to be included in the distribution, with the name
    config.h.generic, for the benefit of those who have to compile without
    Autotools (compare pcre.h, which is now distributed as pcre.h.generic).

 5. Updated the support (such as it is) for Virtual Pascal, thanks to Stefan
    Weber: (1) pcre_internal.h was missing some function renames; (2) updated
    makevp.bat for the current PCRE, using the additional files
    makevp_c.txt, makevp_l.txt, and pcregexp.pas.

 6. A Windows user reported a minor discrepancy with test 2, which turned out
    to be caused by a trailing space on an input line that had got lost in his
    copy. The trailing space was an accident, so I've just removed it.

 7. Add -Wl,-R... flags in pcre-config.in for *BSD* systems, as I'm told
    that is needed.

 8. Mark ucp_table (in ucptable.h) and ucp_gentype (in pcre_ucp_searchfuncs.c)
    as "const" (a) because they are and (b) because it helps the PHP
    maintainers who have recently made a script to detect big data structures
    in the php code that should be moved to the .rodata section. I remembered
    to update Builducptable as well, so it won't revert if ucptable.h is ever
    re-created.

 9. Added some extra #ifdef SUPPORT_UTF8 conditionals into pcretest.c,
    pcre_printint.src, pcre_compile.c, pcre_study.c, and pcre_tables.c, in
    order to be able to cut out the UTF-8 tables in the latter when UTF-8
    support is not required. This saves 1.5-2K of code, which is important in
    some applications.

    Later: more #ifdefs are needed in pcre_ord2utf8.c and pcre_valid_utf8.c
    so as not to refer to the tables, even though these functions will never be
    called when UTF-8 support is disabled. Otherwise there are problems with a
    shared library.

10. Fixed two bugs in the emulated memmove() function in pcre_internal.h:

    (a) It was defining its arguments as char * instead of void *.

    (b) It was assuming that all moves were upwards in memory; this was true
        a long time ago when I wrote it, but is no longer the case.

    The emulated memove() is provided for those environments that have neither
    memmove() nor bcopy(). I didn't think anyone used it these days, but that
    is clearly not the case, as these two bugs were recently reported.

11. The script PrepareRelease is now distributed: it calls 132html, CleanTxt,
    and Detrail to create the HTML documentation, the .txt form of the man
    pages, and it removes trailing spaces from listed files. It also creates
    pcre.h.generic and config.h.generic from pcre.h and config.h. In the latter
    case, it wraps all the #defines with #ifndefs. This script should be run
    before "make dist".

12. Fixed two fairly obscure bugs concerned with quantified caseless matching
    with Unicode property support.

    (a) For a maximizing quantifier, if the two different cases of the
        character were of different lengths in their UTF-8 codings (there are
        some cases like this - I found 11), and the matching function had to
        back up over a mixture of the two cases, it incorrectly assumed they
        were both the same length.

    (b) When PCRE was configured to use the heap rather than the stack for
        recursion during matching, it was not correctly preserving the data for
        the other case of a UTF-8 character when checking ahead for a match
        while processing a minimizing repeat. If the check also involved
        matching a wide character, but failed, corruption could cause an
        erroneous result when trying to check for a repeat of the original
        character.

13. Some tidying changes to the testing mechanism:

    (a) The RunTest script now detects the internal link size and whether there
        is UTF-8 and UCP support by running ./pcretest -C instead of relying on
        values substituted by "configure". (The RunGrepTest script already did
        this for UTF-8.) The configure.ac script no longer substitutes the
        relevant variables.

    (b) The debugging options /B and /D in pcretest show the compiled bytecode
        with length and offset values. This means that the output is different
        for different internal link sizes. Test 2 is skipped for link sizes
        other than 2 because of this, bypassing the problem. Unfortunately,
        there was also a test in test 3 (the locale tests) that used /B and
        failed for link sizes other than 2. Rather than cut the whole test out,
        I have added a new /Z option to pcretest that replaces the length and
        offset values with spaces. This is now used to make test 3 independent
        of link size. (Test 2 will be tidied up later.)

14. If erroroffset was passed as NULL to pcre_compile, it provoked a
    segmentation fault instead of returning the appropriate error message.

15. In multiline mode when the newline sequence was set to "any", the pattern
    ^$ would give a match between the \r and \n of a subject such as "A\r\nB".
    This doesn't seem right; it now treats the CRLF combination as the line
    ending, and so does not match in that case. It's only a pattern such as ^$
    that would hit this one: something like ^ABC$ would have failed after \r
    and then tried again after \r\n.

16. Changed the comparison command for RunGrepTest from "diff -u" to "diff -ub"
    in an attempt to make files that differ only in their line terminators
    compare equal. This works on Linux.

17. Under certain error circumstances pcregrep might try to free random memory
    as it exited. This is now fixed, thanks to valgrind.

19. In pcretest, if the pattern /(?m)^$/g<any> was matched against the string
    "abc\r\n\r\n", it found an unwanted second match after the second \r. This
    was because its rules for how to advance for /g after matching an empty
    string at the end of a line did not allow for this case. They now check for
    it specially.

20. pcretest is supposed to handle patterns and data of any length, by
    extending its buffers when necessary. It was getting this wrong when the
    buffer for a data line had to be extended.

21. Added PCRE_NEWLINE_ANYCRLF which is like ANY, but matches only CR, LF, or
    CRLF as a newline sequence.

22. Code for handling Unicode properties in pcre_dfa_exec() wasn't being cut
    out by #ifdef SUPPORT_UCP. This did no harm, as it could never be used, but
    I have nevertheless tidied it up.

23. Added some casts to kill warnings from HP-UX ia64 compiler.

24. Added a man page for pcre-config.


Version 7.0 19-Dec-06
---------------------

 1. Fixed a signed/unsigned compiler warning in pcre_compile.c, shown up by
    moving to gcc 4.1.1.

 2. The -S option for pcretest uses setrlimit(); I had omitted to #include
    sys/time.h, which is documented as needed for this function. It doesn't
    seem to matter on Linux, but it showed up on some releases of OS X.

 3. It seems that there are systems where bytes whose values are greater than
    127 match isprint() in the "C" locale. The "C" locale should be the
    default when a C program starts up. In most systems, only ASCII printing
    characters match isprint(). This difference caused the output from pcretest
    to vary, making some of the tests fail. I have changed pcretest so that:

    (a) When it is outputting text in the compiled version of a pattern, bytes
        other than 32-126 are always shown as hex escapes.

    (b) When it is outputting text that is a matched part of a subject string,
        it does the same, unless a different locale has been set for the match
        (using the /L modifier). In this case, it uses isprint() to decide.

 4. Fixed a major bug that caused incorrect computation of the amount of memory
    required for a compiled pattern when options that changed within the
    pattern affected the logic of the preliminary scan that determines the
    length. The relevant options are -x, and -i in UTF-8 mode. The result was
    that the computed length was too small. The symptoms of this bug were
    either the PCRE error "internal error: code overflow" from pcre_compile(),
    or a glibc crash with a message such as "pcretest: free(): invalid next
    size (fast)". Examples of patterns that provoked this bug (shown in
    pcretest format) are:

      /(?-x: )/x
      /(?x)(?-x: \s*#\s*)/
      /((?i)[\x{c0}])/8
      /(?i:[\x{c0}])/8

    HOWEVER: Change 17 below makes this fix obsolete as the memory computation
    is now done differently.

 5. Applied patches from Google to: (a) add a QuoteMeta function to the C++
    wrapper classes; (b) implement a new function in the C++ scanner that is
    more efficient than the old way of doing things because it avoids levels of
    recursion in the regex matching; (c) add a paragraph to the documentation
    for the FullMatch() function.

 6. The escape sequence \n was being treated as whatever was defined as
    "newline". Not only was this contrary to the documentation, which states
    that \n is character 10 (hex 0A), but it also went horribly wrong when
    "newline" was defined as CRLF. This has been fixed.

 7. In pcre_dfa_exec.c the value of an unsigned integer (the variable called c)
    was being set to -1 for the "end of line" case (supposedly a value that no
    character can have). Though this value is never used (the check for end of
    line is "zero bytes in current character"), it caused compiler complaints.
    I've changed it to 0xffffffff.

 8. In pcre_version.c, the version string was being built by a sequence of
    C macros that, in the event of PCRE_PRERELEASE being defined as an empty
    string (as it is for production releases) called a macro with an empty
    argument. The C standard says the result of this is undefined. The gcc
    compiler treats it as an empty string (which was what was wanted) but it is
    reported that Visual C gives an error. The source has been hacked around to
    avoid this problem.

 9. On the advice of a Windows user, included <io.h> and <fcntl.h> in Windows
    builds of pcretest, and changed the call to _setmode() to use _O_BINARY
    instead of 0x8000. Made all the #ifdefs test both _WIN32 and WIN32 (not all
    of them did).

10. Originally, pcretest opened its input and output without "b"; then I was
    told that "b" was needed in some environments, so it was added for release
    5.0 to both the input and output. (It makes no difference on Unix-like
    systems.) Later I was told that it is wrong for the input on Windows. I've
    now abstracted the modes into two macros, to make it easier to fiddle with
    them, and removed "b" from the input mode under Windows.

11. Added pkgconfig support for the C++ wrapper library, libpcrecpp.

12. Added -help and --help to pcretest as an official way of being reminded
    of the options.

13. Removed some redundant semicolons after macro calls in pcrecpparg.h.in
    and pcrecpp.cc because they annoy compilers at high warning levels.

14. A bit of tidying/refactoring in pcre_exec.c in the main bumpalong loop.

15. Fixed an occurrence of == in configure.ac that should have been = (shell
    scripts are not C programs :-) and which was not noticed because it works
    on Linux.

16. pcretest is supposed to handle any length of pattern and data line (as one
    line or as a continued sequence of lines) by extending its input buffer if
    necessary. This feature was broken for very long pattern lines, leading to
    a string of junk being passed to pcre_compile() if the pattern was longer
    than about 50K.

17. I have done a major re-factoring of the way pcre_compile() computes the
    amount of memory needed for a compiled pattern. Previously, there was code
    that made a preliminary scan of the pattern in order to do this. That was
    OK when PCRE was new, but as the facilities have expanded, it has become
    harder and harder to keep it in step with the real compile phase, and there
    have been a number of bugs (see for example, 4 above). I have now found a
    cunning way of running the real compile function in a "fake" mode that
    enables it to compute how much memory it would need, while actually only
    ever using a few hundred bytes of working memory and without too many
    tests of the mode. This should make future maintenance and development
    easier. A side effect of this work is that the limit of 200 on the nesting
    depth of parentheses has been removed (though this was never a serious
    limitation, I suspect). However, there is a downside: pcre_compile() now
    runs more slowly than before (30% or more, depending on the pattern). I
    hope this isn't a big issue. There is no effect on runtime performance.

18. Fixed a minor bug in pcretest: if a pattern line was not terminated by a
    newline (only possible for the last line of a file) and it was a
    pattern that set a locale (followed by /Lsomething), pcretest crashed.

19. Added additional timing features to pcretest. (1) The -tm option now times
    matching only, not compiling. (2) Both -t and -tm can be followed, as a
    separate command line item, by a number that specifies the number of
    repeats to use when timing. The default is 50000; this gives better
    precision, but takes uncomfortably long for very large patterns.

20. Extended pcre_study() to be more clever in cases where a branch of a
    subpattern has no definite first character. For example, (a*|b*)[cd] would
    previously give no result from pcre_study(). Now it recognizes that the
    first character must be a, b, c, or d.

21. There was an incorrect error "recursive call could loop indefinitely" if
    a subpattern (or the entire pattern) that was being tested for matching an
    empty string contained only one non-empty item after a nested subpattern.
    For example, the pattern (?>\x{100}*)\d(?R) provoked this error
    incorrectly, because the \d was being skipped in the check.

22. The pcretest program now has a new pattern option /B and a command line
    option -b, which is equivalent to adding /B to every pattern. This causes
    it to show the compiled bytecode, without the additional information that
    -d shows. The effect of -d is now the same as -b with -i (and similarly, /D
    is the same as /B/I).

23. A new optimization is now able automatically to treat some sequences such
    as a*b as a*+b. More specifically, if something simple (such as a character
    or a simple class like \d) has an unlimited quantifier, and is followed by
    something that cannot possibly match the quantified thing, the quantifier
    is automatically "possessified".

24. A recursive reference to a subpattern whose number was greater than 39
    went wrong under certain circumstances in UTF-8 mode. This bug could also
    have affected the operation of pcre_study().

25. Realized that a little bit of performance could be had by replacing
    (c & 0xc0) == 0xc0 with c >= 0xc0 when processing UTF-8 characters.

26. Timing data from pcretest is now shown to 4 decimal places instead of 3.

27. Possessive quantifiers such as a++ were previously implemented by turning
    them into atomic groups such as ($>a+). Now they have their own opcodes,
    which improves performance. This includes the automatically created ones
    from 23 above.

28. A pattern such as (?=(\w+))\1: which simulates an atomic group using a
    lookahead was broken if it was not anchored. PCRE was mistakenly expecting
    the first matched character to be a colon. This applied both to named and
    numbered groups.

29. The ucpinternal.h header file was missing its idempotency #ifdef.

30. I was sent a "project" file called libpcre.a.dev which I understand makes
    building PCRE on Windows easier, so I have included it in the distribution.

31. There is now a check in pcretest against a ridiculously large number being
    returned by pcre_exec() or pcre_dfa_exec(). If this happens in a /g or /G
    loop, the loop is abandoned.

32. Forward references to subpatterns in conditions such as (?(2)...) where
    subpattern 2 is defined later cause pcre_compile() to search forwards in
    the pattern for the relevant set of parentheses. This search went wrong
    when there were unescaped parentheses in a character class, parentheses
    escaped with \Q...\E, or parentheses in a #-comment in /x mode.

33. "Subroutine" calls and backreferences were previously restricted to
    referencing subpatterns earlier in the regex. This restriction has now
    been removed.

34. Added a number of extra features that are going to be in Perl 5.10. On the
    whole, these are just syntactic alternatives for features that PCRE had
    previously implemented using the Python syntax or my own invention. The
    other formats are all retained for compatibility.

    (a) Named groups can now be defined as (?<name>...) or (?'name'...) as well
        as (?P<name>...). The new forms, as well as being in Perl 5.10, are
        also .NET compatible.

    (b) A recursion or subroutine call to a named group can now be defined as
        (?&name) as well as (?P>name).

    (c) A backreference to a named group can now be defined as \k<name> or
        \k'name' as well as (?P=name). The new forms, as well as being in Perl
        5.10, are also .NET compatible.

    (d) A conditional reference to a named group can now use the syntax
        (?(<name>) or (?('name') as well as (?(name).

    (e) A "conditional group" of the form (?(DEFINE)...) can be used to define
        groups (named and numbered) that are never evaluated inline, but can be
        called as "subroutines" from elsewhere. In effect, the DEFINE condition
        is always false. There may be only one alternative in such a group.

    (f) A test for recursion can be given as (?(R1).. or (?(R&name)... as well
        as the simple (?(R). The condition is true only if the most recent
        recursion is that of the given number or name. It does not search out
        through the entire recursion stack.

    (g) The escape \gN or \g{N} has been added, where N is a positive or
        negative number, specifying an absolute or relative reference.

35. Tidied to get rid of some further signed/unsigned compiler warnings and
    some "unreachable code" warnings.

36. Updated the Unicode property tables to Unicode version 5.0.0. Amongst other
    things, this adds five new scripts.

37. Perl ignores orphaned \E escapes completely. PCRE now does the same.
    There were also incompatibilities regarding the handling of \Q..\E inside
    character classes, for example with patterns like [\Qa\E-\Qz\E] where the
    hyphen was adjacent to \Q or \E. I hope I've cleared all this up now.

38. Like Perl, PCRE detects when an indefinitely repeated parenthesized group
    matches an empty string, and forcibly breaks the loop. There were bugs in
    this code in non-simple cases. For a pattern such as  ^(a()*)*  matched
    against  aaaa  the result was just "a" rather than "aaaa", for example. Two
    separate and independent bugs (that affected different cases) have been
    fixed.

39. Refactored the code to abolish the use of different opcodes for small
    capturing bracket numbers. This is a tidy that I avoided doing when I
    removed the limit on the number of capturing brackets for 3.5 back in 2001.
    The new approach is not only tidier, it makes it possible to reduce the
    memory needed to fix the previous bug (38).

40. Implemented PCRE_NEWLINE_ANY to recognize any of the Unicode newline
    sequences (http://unicode.org/unicode/reports/tr18/) as "newline" when
    processing dot, circumflex, or dollar metacharacters, or #-comments in /x
    mode.

41. Add \R to match any Unicode newline sequence, as suggested in the Unicode
    report.

42. Applied patch, originally from Ari Pollak, modified by Google, to allow
    copy construction and assignment in the C++ wrapper.

43. Updated pcregrep to support "--newline=any". In the process, I fixed a
    couple of bugs that could have given wrong results in the "--newline=crlf"
    case.

44. Added a number of casts and did some reorganization of signed/unsigned int
    variables following suggestions from Dair Grant. Also renamed the variable
    "this" as "item" because it is a C++ keyword.

45. Arranged for dftables to add

      #include "pcre_internal.h"

    to pcre_chartables.c because without it, gcc 4.x may remove the array
    definition from the final binary if PCRE is built into a static library and
    dead code stripping is activated.

46. For an unanchored pattern, if a match attempt fails at the start of a
    newline sequence, and the newline setting is CRLF or ANY, and the next two
    characters are CRLF, advance by two characters instead of one.


Version 6.7 04-Jul-06
---------------------

 1. In order to handle tests when input lines are enormously long, pcretest has
    been re-factored so that it automatically extends its buffers when
    necessary. The code is crude, but this _is_ just a test program. The
    default size has been increased from 32K to 50K.

 2. The code in pcre_study() was using the value of the re argument before
    testing it for NULL. (Of course, in any sensible call of the function, it
    won't be NULL.)

 3. The memmove() emulation function in pcre_internal.h, which is used on
    systems that lack both memmove() and bcopy() - that is, hardly ever -
    was missing a "static" storage class specifier.

 4. When UTF-8 mode was not set, PCRE looped when compiling certain patterns
    containing an extended class (one that cannot be represented by a bitmap
    because it contains high-valued characters or Unicode property items, e.g.
    [\pZ]). Almost always one would set UTF-8 mode when processing such a
    pattern, but PCRE should not loop if you do not (it no longer does).
    [Detail: two cases were found: (a) a repeated subpattern containing an
    extended class; (b) a recursive reference to a subpattern that followed a
    previous extended class. It wasn't skipping over the extended class
    correctly when UTF-8 mode was not set.]

 5. A negated single-character class was not being recognized as fixed-length
    in lookbehind assertions such as (?<=[^f]), leading to an incorrect
    compile error "lookbehind assertion is not fixed length".

 6. The RunPerlTest auxiliary script was showing an unexpected difference
    between PCRE and Perl for UTF-8 tests. It turns out that it is hard to
    write a Perl script that can interpret lines of an input file either as
    byte characters or as UTF-8, which is what "perltest" was being required to
    do for the non-UTF-8 and UTF-8 tests, respectively. Essentially what you
    can't do is switch easily at run time between having the "use utf8;" pragma
    or not. In the end, I fudged it by using the RunPerlTest script to insert
    "use utf8;" explicitly for the UTF-8 tests.

 7. In multiline (/m) mode, PCRE was matching ^ after a terminating newline at
    the end of the subject string, contrary to the documentation and to what
    Perl does. This was true of both matching functions. Now it matches only at
    the start of the subject and immediately after *internal* newlines.

 8. A call of pcre_fullinfo() from pcretest to get the option bits was passing
    a pointer to an int instead of a pointer to an unsigned long int. This
    caused problems on 64-bit systems.

 9. Applied a patch from the folks at Google to pcrecpp.cc, to fix "another
    instance of the 'standard' template library not being so standard".

10. There was no check on the number of named subpatterns nor the maximum
    length of a subpattern name. The product of these values is used to compute
    the size of the memory block for a compiled pattern. By supplying a very
    long subpattern name and a large number of named subpatterns, the size
    computation could be caused to overflow. This is now prevented by limiting
    the length of names to 32 characters, and the number of named subpatterns
    to 10,000.

11. Subpatterns that are repeated with specific counts have to be replicated in
    the compiled pattern. The size of memory for this was computed from the
    length of the subpattern and the repeat count. The latter is limited to
    65535, but there was no limit on the former, meaning that integer overflow
    could in principle occur. The compiled length of a repeated subpattern is
    now limited to 30,000 bytes in order to prevent this.

12. Added the optional facility to have named substrings with the same name.

13. Added the ability to use a named substring as a condition, using the
    Python syntax: (?(name)yes|no). This overloads (?(R)... and names that
    are numbers (not recommended). Forward references are permitted.

14. Added forward references in named backreferences (if you see what I mean).

15. In UTF-8 mode, with the PCRE_DOTALL option set, a quantified dot in the
    pattern could run off the end of the subject. For example, the pattern
    "(?s)(.{1,5})"8 did this with the subject "ab".

16. If PCRE_DOTALL or PCRE_MULTILINE were set, pcre_dfa_exec() behaved as if
    PCRE_CASELESS was set when matching characters that were quantified with ?
    or *.

17. A character class other than a single negated character that had a minimum
    but no maximum quantifier - for example [ab]{6,} - was not handled
    correctly by pce_dfa_exec(). It would match only one character.

18. A valid (though odd) pattern that looked like a POSIX character
    class but used an invalid character after [ (for example [[,abc,]]) caused
    pcre_compile() to give the error "Failed: internal error: code overflow" or
    in some cases to crash with a glibc free() error. This could even happen if
    the pattern terminated after [[ but there just happened to be a sequence of
    letters, a binary zero, and a closing ] in the memory that followed.

19. Perl's treatment of octal escapes in the range \400 to \777 has changed
    over the years. Originally (before any Unicode support), just the bottom 8
    bits were taken. Thus, for example, \500 really meant \100. Nowadays the
    output from "man perlunicode" includes this:

      The regular expression compiler produces polymorphic opcodes.  That
      is, the pattern adapts to the data and automatically switches to
      the Unicode character scheme when presented with Unicode data--or
      instead uses a traditional byte scheme when presented with byte
      data.

    Sadly, a wide octal escape does not cause a switch, and in a string with
    no other multibyte characters, these octal escapes are treated as before.
    Thus, in Perl, the pattern  /\500/ actually matches \100 but the pattern
    /\500|\x{1ff}/ matches \500 or \777 because the whole thing is treated as a
    Unicode string.

    I have not perpetrated such confusion in PCRE. Up till now, it took just
    the bottom 8 bits, as in old Perl. I have now made octal escapes with
    values greater than \377 illegal in non-UTF-8 mode. In UTF-8 mode they
    translate to the appropriate multibyte character.

29. Applied some refactoring to reduce the number of warnings from Microsoft
    and Borland compilers. This has included removing the fudge introduced
    seven years ago for the OS/2 compiler (see 2.02/2 below) because it caused
    a warning about an unused variable.

21. PCRE has not included VT (character 0x0b) in the set of whitespace
    characters since release 4.0, because Perl (from release 5.004) does not.
    [Or at least, is documented not to: some releases seem to be in conflict
    with the documentation.] However, when a pattern was studied with
    pcre_study() and all its branches started with \s, PCRE still included VT
    as a possible starting character. Of course, this did no harm; it just
    caused an unnecessary match attempt.

22. Removed a now-redundant internal flag bit that recorded the fact that case
    dependency changed within the pattern. This was once needed for "required
    byte" processing, but is no longer used. This recovers a now-scarce options
    bit. Also moved the least significant internal flag bit to the most-
    significant bit of the word, which was not previously used (hangover from
    the days when it was an int rather than a uint) to free up another bit for
    the future.

23. Added support for CRLF line endings as well as CR and LF. As well as the
    default being selectable at build time, it can now be changed at runtime
    via the PCRE_NEWLINE_xxx flags. There are now options for pcregrep to
    specify that it is scanning data with non-default line endings.

24. Changed the definition of CXXLINK to make it agree with the definition of
    LINK in the Makefile, by replacing LDFLAGS to CXXFLAGS.

25. Applied Ian Taylor's patches to avoid using another stack frame for tail
    recursions. This makes a big different to stack usage for some patterns.

26. If a subpattern containing a named recursion or subroutine reference such
    as (?P>B) was quantified, for example (xxx(?P>B)){3}, the calculation of
    the space required for the compiled pattern went wrong and gave too small a
    value. Depending on the environment, this could lead to "Failed: internal
    error: code overflow at offset 49" or "glibc detected double free or
    corruption" errors.

27. Applied patches from Google (a) to support the new newline modes and (b) to
    advance over multibyte UTF-8 characters in GlobalReplace.

28. Change free() to pcre_free() in pcredemo.c. Apparently this makes a
    difference for some implementation of PCRE in some Windows version.

29. Added some extra testing facilities to pcretest:

    \q<number>   in a data line sets the "match limit" value
    \Q<number>   in a data line sets the "match recursion limt" value
    -S <number>  sets the stack size, where <number> is in megabytes

    The -S option isn't available for Windows.


Version 6.6 06-Feb-06
---------------------

 1. Change 16(a) for 6.5 broke things, because PCRE_DATA_SCOPE was not defined
    in pcreposix.h. I have copied the definition from pcre.h.

 2. Change 25 for 6.5 broke compilation in a build directory out-of-tree
    because pcre.h is no longer a built file.

 3. Added Jeff Friedl's additional debugging patches to pcregrep. These are
    not normally included in the compiled code.


Version 6.5 01-Feb-06
---------------------

 1. When using the partial match feature with pcre_dfa_exec(), it was not
    anchoring the second and subsequent partial matches at the new starting
    point. This could lead to incorrect results. For example, with the pattern
    /1234/, partially matching against "123" and then "a4" gave a match.

 2. Changes to pcregrep:

    (a) All non-match returns from pcre_exec() were being treated as failures
        to match the line. Now, unless the error is PCRE_ERROR_NOMATCH, an
        error message is output. Some extra information is given for the
        PCRE_ERROR_MATCHLIMIT and PCRE_ERROR_RECURSIONLIMIT errors, which are
        probably the only errors that are likely to be caused by users (by
        specifying a regex that has nested indefinite repeats, for instance).
        If there are more than 20 of these errors, pcregrep is abandoned.

    (b) A binary zero was treated as data while matching, but terminated the
        output line if it was written out. This has been fixed: binary zeroes
        are now no different to any other data bytes.

    (c) Whichever of the LC_ALL or LC_CTYPE environment variables is set is
        used to set a locale for matching. The --locale=xxxx long option has
        been added (no short equivalent) to specify a locale explicitly on the
        pcregrep command, overriding the environment variables.

    (d) When -B was used with -n, some line numbers in the output were one less
        than they should have been.

    (e) Added the -o (--only-matching) option.

    (f) If -A or -C was used with -c (count only), some lines of context were
        accidentally printed for the final match.

    (g) Added the -H (--with-filename) option.

    (h) The combination of options -rh failed to suppress file names for files
        that were found from directory arguments.

    (i) Added the -D (--devices) and -d (--directories) options.

    (j) Added the -F (--fixed-strings) option.

    (k) Allow "-" to be used as a file name for -f as well as for a data file.

    (l) Added the --colo(u)r option.

    (m) Added Jeffrey Friedl's -S testing option, but within #ifdefs so that it
        is not present by default.

 3. A nasty bug was discovered in the handling of recursive patterns, that is,
    items such as (?R) or (?1), when the recursion could match a number of
    alternatives. If it matched one of the alternatives, but subsequently,
    outside the recursion, there was a failure, the code tried to back up into
    the recursion. However, because of the way PCRE is implemented, this is not
    possible, and the result was an incorrect result from the match.

    In order to prevent this happening, the specification of recursion has
    been changed so that all such subpatterns are automatically treated as
    atomic groups. Thus, for example, (?R) is treated as if it were (?>(?R)).

 4. I had overlooked the fact that, in some locales, there are characters for
    which isalpha() is true but neither isupper() nor islower() are true. In
    the fr_FR locale, for instance, the \xAA and \xBA characters (ordmasculine
    and ordfeminine) are like this. This affected the treatment of \w and \W
    when they appeared in character classes, but not when they appeared outside
    a character class. The bit map for "word" characters is now created
    separately from the results of isalnum() instead of just taking it from the
    upper, lower, and digit maps. (Plus the underscore character, of course.)

 5. The above bug also affected the handling of POSIX character classes such as
    [[:alpha:]] and [[:alnum:]]. These do not have their own bit maps in PCRE's
    permanent tables. Instead, the bit maps for such a class were previously
    created as the appropriate unions of the upper, lower, and digit bitmaps.
    Now they are created by subtraction from the [[:word:]] class, which has
    its own bitmap.

 6. The [[:blank:]] character class matches horizontal, but not vertical space.
    It is created by subtracting the vertical space characters (\x09, \x0a,
    \x0b, \x0c) from the [[:space:]] bitmap. Previously, however, the
    subtraction was done in the overall bitmap for a character class, meaning
    that a class such as [\x0c[:blank:]] was incorrect because \x0c would not
    be recognized. This bug has been fixed.

 7. Patches from the folks at Google:

      (a) pcrecpp.cc: "to handle a corner case that may or may not happen in
      real life, but is still worth protecting against".

      (b) pcrecpp.cc: "corrects a bug when negative radixes are used with
      regular expressions".

      (c) pcre_scanner.cc: avoid use of std::count() because not all systems
      have it.

      (d) Split off pcrecpparg.h from pcrecpp.h and had the former built by
      "configure" and the latter not, in order to fix a problem somebody had
      with compiling the Arg class on HP-UX.

      (e) Improve the error-handling of the C++ wrapper a little bit.

      (f) New tests for checking recursion limiting.

 8. The pcre_memmove() function, which is used only if the environment does not
    have a standard memmove() function (and is therefore rarely compiled),
    contained two bugs: (a) use of int instead of size_t, and (b) it was not
    returning a result (though PCRE never actually uses the result).

 9. In the POSIX regexec() interface, if nmatch is specified as a ridiculously
    large number - greater than INT_MAX/(3*sizeof(int)) - REG_ESPACE is
    returned instead of calling malloc() with an overflowing number that would
    most likely cause subsequent chaos.

10. The debugging option of pcretest was not showing the NO_AUTO_CAPTURE flag.

11. The POSIX flag REG_NOSUB is now supported. When a pattern that was compiled
    with this option is matched, the nmatch and pmatch options of regexec() are
    ignored.

12. Added REG_UTF8 to the POSIX interface. This is not defined by POSIX, but is
    provided in case anyone wants to the the POSIX interface with UTF-8
    strings.

13. Added CXXLDFLAGS to the Makefile parameters to provide settings only on the
    C++ linking (needed for some HP-UX environments).

14. Avoid compiler warnings in get_ucpname() when compiled without UCP support
    (unused parameter) and in the pcre_printint() function (omitted "default"
    switch label when the default is to do nothing).

15. Added some code to make it possible, when PCRE is compiled as a C++
    library, to replace subject pointers for pcre_exec() with a smart pointer
    class, thus making it possible to process discontinuous strings.

16. The two macros PCRE_EXPORT and PCRE_DATA_SCOPE are confusing, and perform
    much the same function. They were added by different people who were trying
    to make PCRE easy to compile on non-Unix systems. It has been suggested
    that PCRE_EXPORT be abolished now that there is more automatic apparatus
    for compiling on Windows systems. I have therefore replaced it with
    PCRE_DATA_SCOPE. This is set automatically for Windows; if not set it
    defaults to "extern" for C or "extern C" for C++, which works fine on
    Unix-like systems. It is now possible to override the value of PCRE_DATA_
    SCOPE with something explicit in config.h. In addition:

    (a) pcreposix.h still had just "extern" instead of either of these macros;
        I have replaced it with PCRE_DATA_SCOPE.

    (b) Functions such as _pcre_xclass(), which are internal to the library,
        but external in the C sense, all had PCRE_EXPORT in their definitions.
        This is apparently wrong for the Windows case, so I have removed it.
        (It makes no difference on Unix-like systems.)

17. Added a new limit, MATCH_LIMIT_RECURSION, which limits the depth of nesting
    of recursive calls to match(). This is different to MATCH_LIMIT because
    that limits the total number of calls to match(), not all of which increase
    the depth of recursion. Limiting the recursion depth limits the amount of
    stack (or heap if NO_RECURSE is set) that is used. The default can be set
    when PCRE is compiled, and changed at run time. A patch from Google adds
    this functionality to the C++ interface.

18. Changes to the handling of Unicode character properties:

    (a) Updated the table to Unicode 4.1.0.

    (b) Recognize characters that are not in the table as "Cn" (undefined).

    (c) I revised the way the table is implemented to a much improved format
        which includes recognition of ranges. It now supports the ranges that
        are defined in UnicodeData.txt, and it also amalgamates other
        characters into ranges. This has reduced the number of entries in the
        table from around 16,000 to around 3,000, thus reducing its size
        considerably. I realized I did not need to use a tree structure after
        all - a binary chop search is just as efficient. Having reduced the
        number of entries, I extended their size from 6 bytes to 8 bytes to
        allow for more data.

    (d) Added support for Unicode script names via properties such as \p{Han}.

19. In UTF-8 mode, a backslash followed by a non-Ascii character was not
    matching that character.

20. When matching a repeated Unicode property with a minimum greater than zero,
    (for example \pL{2,}), PCRE could look past the end of the subject if it
    reached it while seeking the minimum number of characters. This could
    happen only if some of the characters were more than one byte long, because
    there is a check for at least the minimum number of bytes.

21. Refactored the implementation of \p and \P so as to be more general, to
    allow for more different types of property in future. This has changed the
    compiled form incompatibly. Anybody with saved compiled patterns that use
    \p or \P will have to recompile them.

22. Added "Any" and "L&" to the supported property types.

23. Recognize \x{...} as a code point specifier, even when not in UTF-8 mode,
    but give a compile time error if the value is greater than 0xff.

24. The man pages for pcrepartial, pcreprecompile, and pcre_compile2 were
    accidentally not being installed or uninstalled.

25. The pcre.h file was built from pcre.h.in, but the only changes that were
    made were to insert the current release number. This seemed silly, because
    it made things harder for people building PCRE on systems that don't run
    "configure". I have turned pcre.h into a distributed file, no longer built
    by "configure", with the version identification directly included. There is
    no longer a pcre.h.in file.

    However, this change necessitated a change to the pcre-config script as
    well. It is built from pcre-config.in, and one of the substitutions was the
    release number. I have updated configure.ac so that ./configure now finds
    the release number by grepping pcre.h.

26. Added the ability to run the tests under valgrind.


Version 6.4 05-Sep-05
---------------------

 1. Change 6.0/10/(l) to pcregrep introduced a bug that caused separator lines
    "--" to be printed when multiple files were scanned, even when none of the
    -A, -B, or -C options were used. This is not compatible with Gnu grep, so I
    consider it to be a bug, and have restored the previous behaviour.

 2. A couple of code tidies to get rid of compiler warnings.

 3. The pcretest program used to cheat by referring to symbols in the library
    whose names begin with _pcre_. These are internal symbols that are not
    really supposed to be visible externally, and in some environments it is
    possible to suppress them. The cheating is now confined to including
    certain files from the library's source, which is a bit cleaner.

 4. Renamed pcre.in as pcre.h.in to go with pcrecpp.h.in; it also makes the
    file's purpose clearer.

 5. Reorganized pcre_ucp_findchar().


Version 6.3 15-Aug-05
---------------------

 1. The file libpcre.pc.in did not have general read permission in the tarball.

 2. There were some problems when building without C++ support:

    (a) If C++ support was not built, "make install" and "make test" still
        tried to test it.

    (b) There were problems when the value of CXX was explicitly set. Some
        changes have been made to try to fix these, and ...

    (c) --disable-cpp can now be used to explicitly disable C++ support.

    (d) The use of @CPP_OBJ@ directly caused a blank line preceded by a
        backslash in a target when C++ was disabled. This confuses some
        versions of "make", apparently. Using an intermediate variable solves
        this. (Same for CPP_LOBJ.)

 3. $(LINK_FOR_BUILD) now includes $(CFLAGS_FOR_BUILD) and $(LINK)
    (non-Windows) now includes $(CFLAGS) because these flags are sometimes
    necessary on certain architectures.

 4. Added a setting of -export-symbols-regex to the link command to remove
    those symbols that are exported in the C sense, but actually are local
    within the library, and not documented. Their names all begin with
    "_pcre_". This is not a perfect job, because (a) we have to except some
    symbols that pcretest ("illegally") uses, and (b) the facility isn't always
    available (and never for static libraries). I have made a note to try to
    find a way round (a) in the future.


Version 6.2 01-Aug-05
---------------------

 1. There was no test for integer overflow of quantifier values. A construction
    such as {1111111111111111} would give undefined results. What is worse, if
    a minimum quantifier for a parenthesized subpattern overflowed and became
    negative, the calculation of the memory size went wrong. This could have
    led to memory overwriting.

 2. Building PCRE using VPATH was broken. Hopefully it is now fixed.

 3. Added "b" to the 2nd argument of fopen() in dftables.c, for non-Unix-like
    operating environments where this matters.

 4. Applied Giuseppe Maxia's patch to add additional features for controlling
    PCRE options from within the C++ wrapper.

 5. Named capturing subpatterns were not being correctly counted when a pattern
    was compiled. This caused two problems: (a) If there were more than 100
    such subpatterns, the calculation of the memory needed for the whole
    compiled pattern went wrong, leading to an overflow error. (b) Numerical
    back references of the form \12, where the number was greater than 9, were
    not recognized as back references, even though there were sufficient
    previous subpatterns.

 6. Two minor patches to pcrecpp.cc in order to allow it to compile on older
    versions of gcc, e.g. 2.95.4.


Version 6.1 21-Jun-05
---------------------

 1. There was one reference to the variable "posix" in pcretest.c that was not
    surrounded by "#if !defined NOPOSIX".

 2. Make it possible to compile pcretest without DFA support, UTF8 support, or
    the cross-check on the old pcre_info() function, for the benefit of the
    cut-down version of PCRE that is currently imported into Exim.

 3. A (silly) pattern starting with (?i)(?-i) caused an internal space
    allocation error. I've done the easy fix, which wastes 2 bytes for sensible
    patterns that start (?i) but I don't think that matters. The use of (?i) is
    just an example; this all applies to the other options as well.

 4. Since libtool seems to echo the compile commands it is issuing, the output
    from "make" can be reduced a bit by putting "@" in front of each libtool
    compile command.

 5. Patch from the folks at Google for configure.in to be a bit more thorough
    in checking for a suitable C++ installation before trying to compile the
    C++ stuff. This should fix a reported problem when a compiler was present,
    but no suitable headers.

 6. The man pages all had just "PCRE" as their title. I have changed them to
    be the relevant file name. I have also arranged that these names are
    retained in the file doc/pcre.txt, which is a concatenation in text format
    of all the man pages except the little individual ones for each function.

 7. The NON-UNIX-USE file had not been updated for the different set of source
    files that come with release 6. I also added a few comments about the C++
    wrapper.


Version 6.0 07-Jun-05
---------------------

 1. Some minor internal re-organization to help with my DFA experiments.

 2. Some missing #ifdef SUPPORT_UCP conditionals in pcretest and printint that
    didn't matter for the library itself when fully configured, but did matter
    when compiling without UCP support, or within Exim, where the ucp files are
    not imported.

 3. Refactoring of the library code to split up the various functions into
    different source modules. The addition of the new DFA matching code (see
    below) to a single monolithic source would have made it really too
    unwieldy, quite apart from causing all the code to be include in a
    statically linked application, when only some functions are used. This is
    relevant even without the DFA addition now that patterns can be compiled in
    one application and matched in another.

    The downside of splitting up is that there have to be some external
    functions and data tables that are used internally in different modules of
    the library but which are not part of the API. These have all had their
    names changed to start with "_pcre_" so that they are unlikely to clash
    with other external names.

 4. Added an alternate matching function, pcre_dfa_exec(), which matches using
    a different (DFA) algorithm. Although it is slower than the original
    function, it does have some advantages for certain types of matching
    problem.

 5. Upgrades to pcretest in order to test the features of pcre_dfa_exec(),
    including restarting after a partial match.

 6. A patch for pcregrep that defines INVALID_FILE_ATTRIBUTES if it is not
    defined when compiling for Windows was sent to me. I have put it into the
    code, though I have no means of testing or verifying it.

 7. Added the pcre_refcount() auxiliary function.

 8. Added the PCRE_FIRSTLINE option. This constrains an unanchored pattern to
    match before or at the first newline in the subject string. In pcretest,
    the /f option on a pattern can be used to set this.

 9. A repeated \w when used in UTF-8 mode with characters greater than 256
    would behave wrongly. This has been present in PCRE since release 4.0.

10. A number of changes to the pcregrep command:

    (a) Refactored how -x works; insert ^(...)$ instead of setting
        PCRE_ANCHORED and checking the length, in preparation for adding
        something similar for -w.

    (b) Added the -w (match as a word) option.

    (c) Refactored the way lines are read and buffered so as to have more
        than one at a time available.

    (d) Implemented a pcregrep test script.

    (e) Added the -M (multiline match) option. This allows patterns to match
        over several lines of the subject. The buffering ensures that at least
        8K, or the rest of the document (whichever is the shorter) is available
        for matching (and similarly the previous 8K for lookbehind assertions).

    (f) Changed the --help output so that it now says

          -w, --word-regex(p)

        instead of two lines, one with "regex" and the other with "regexp"
        because that confused at least one person since the short forms are the
        same. (This required a bit of code, as the output is generated
        automatically from a table. It wasn't just a text change.)

    (g) -- can be used to terminate pcregrep options if the next thing isn't an
        option but starts with a hyphen. Could be a pattern or a path name
        starting with a hyphen, for instance.

    (h) "-" can be given as a file name to represent stdin.

    (i) When file names are being printed, "(standard input)" is used for
        the standard input, for compatibility with GNU grep. Previously
        "<stdin>" was used.

    (j) The option --label=xxx can be used to supply a name to be used for
        stdin when file names are being printed. There is no short form.

    (k) Re-factored the options decoding logic because we are going to add
        two more options that take data. Such options can now be given in four
        different ways, e.g. "-fname", "-f name", "--file=name", "--file name".

    (l) Added the -A, -B, and -C options for requesting that lines of context
        around matches be printed.

    (m) Added the -L option to print the names of files that do not contain
        any matching lines, that is, the complement of -l.

    (n) The return code is 2 if any file cannot be opened, but pcregrep does
        continue to scan other files.

    (o) The -s option was incorrectly implemented. For compatibility with other
        greps, it now suppresses the error message for a non-existent or non-
        accessible file (but not the return code). There is a new option called
        -q that suppresses the output of matching lines, which was what -s was
        previously doing.

    (p) Added --include and --exclude options to specify files for inclusion
        and exclusion when recursing.

11. The Makefile was not using the Autoconf-supported LDFLAGS macro properly.
    Hopefully, it now does.

12. Missing cast in pcre_study().

13. Added an "uninstall" target to the makefile.

14. Replaced "extern" in the function prototypes in Makefile.in with
    "PCRE_DATA_SCOPE", which defaults to 'extern' or 'extern "C"' in the Unix
    world, but is set differently for Windows.

15. Added a second compiling function called pcre_compile2(). The only
    difference is that it has an extra argument, which is a pointer to an
    integer error code. When there is a compile-time failure, this is set
    non-zero, in addition to the error test pointer being set to point to an
    error message. The new argument may be NULL if no error number is required
    (but then you may as well call pcre_compile(), which is now just a
    wrapper). This facility is provided because some applications need a
    numeric error indication, but it has also enabled me to tidy up the way
    compile-time errors are handled in the POSIX wrapper.

16. Added VPATH=.libs to the makefile; this should help when building with one
    prefix path and installing with another. (Or so I'm told by someone who
    knows more about this stuff than I do.)

17. Added a new option, REG_DOTALL, to the POSIX function regcomp(). This
    passes PCRE_DOTALL to the pcre_compile() function, making the "." character
    match everything, including newlines. This is not POSIX-compatible, but
    somebody wanted the feature. From pcretest it can be activated by using
    both the P and the s flags.

18. AC_PROG_LIBTOOL appeared twice in Makefile.in. Removed one.

19. libpcre.pc was being incorrectly installed as executable.

20. A couple of places in pcretest check for end-of-line by looking for '\n';
    it now also looks for '\r' so that it will work unmodified on Windows.

21. Added Google's contributed C++ wrapper to the distribution.

22. Added some untidy missing memory free() calls in pcretest, to keep
    Electric Fence happy when testing.



Version 5.0 13-Sep-04
---------------------

 1. Internal change: literal characters are no longer packed up into items
    containing multiple characters in a single byte-string. Each character
    is now matched using a separate opcode. However, there may be more than one
    byte in the character in UTF-8 mode.

 2. The pcre_callout_block structure has two new fields: pattern_position and
    next_item_length. These contain the offset in the pattern to the next match
    item, and its length, respectively.

 3. The PCRE_AUTO_CALLOUT option for pcre_compile() requests the automatic
    insertion of callouts before each pattern item. Added the /C option to
    pcretest to make use of this.

 4. On the advice of a Windows user, the lines

      #if defined(_WIN32) || defined(WIN32)
      _setmode( _fileno( stdout ), 0x8000 );
      #endif  /* defined(_WIN32) || defined(WIN32) */

    have been added to the source of pcretest. This apparently does useful
    magic in relation to line terminators.

 5. Changed "r" and "w" in the calls to fopen() in pcretest to "rb" and "wb"
    for the benefit of those environments where the "b" makes a difference.

 6. The icc compiler has the same options as gcc, but "configure" doesn't seem
    to know about it. I have put a hack into configure.in that adds in code
    to set GCC=yes if CC=icc. This seems to end up at a point in the
    generated configure script that is early enough to affect the setting of
    compiler options, which is what is needed, but I have no means of testing
    whether it really works. (The user who reported this had patched the
    generated configure script, which of course I cannot do.)

    LATER: After change 22 below (new libtool files), the configure script
    seems to know about icc (and also ecc). Therefore, I have commented out
    this hack in configure.in.

 7. Added support for pkg-config (2 patches were sent in).

 8. Negated POSIX character classes that used a combination of internal tables
    were completely broken. These were [[:^alpha:]], [[:^alnum:]], and
    [[:^ascii]]. Typically, they would match almost any characters. The other
    POSIX classes were not broken in this way.

 9. Matching the pattern "\b.*?" against "ab cd", starting at offset 1, failed
    to find the match, as PCRE was deluded into thinking that the match had to
    start at the start point or following a newline. The same bug applied to
    patterns with negative forward assertions or any backward assertions
    preceding ".*" at the start, unless the pattern required a fixed first
    character. This was a failing pattern: "(?!.bcd).*". The bug is now fixed.

10. In UTF-8 mode, when moving forwards in the subject after a failed match
    starting at the last subject character, bytes beyond the end of the subject
    string were read.

11. Renamed the variable "class" as "classbits" to make life easier for C++
    users. (Previously there was a macro definition, but it apparently wasn't
    enough.)

12. Added the new field "tables" to the extra data so that tables can be passed
    in at exec time, or the internal tables can be re-selected. This allows
    a compiled regex to be saved and re-used at a later time by a different
    program that might have everything at different addresses.

13. Modified the pcre-config script so that, when run on Solaris, it shows a
    -R library as well as a -L library.

14. The debugging options of pcretest (-d on the command line or D on a
    pattern) showed incorrect output for anything following an extended class
    that contained multibyte characters and which was followed by a quantifier.

15. Added optional support for general category Unicode character properties
    via the \p, \P, and \X escapes. Unicode property support implies UTF-8
    support. It adds about 90K to the size of the library. The meanings of the
    inbuilt class escapes such as \d and \s have NOT been changed.

16. Updated pcredemo.c to include calls to free() to release the memory for the
    compiled pattern.

17. The generated file chartables.c was being created in the source directory
    instead of in the building directory. This caused the build to fail if the
    source directory was different from the building directory, and was
    read-only.

18. Added some sample Win commands from Mark Tetrode into the NON-UNIX-USE
    file. No doubt somebody will tell me if they don't make sense... Also added
    Dan Mooney's comments about building on OpenVMS.

19. Added support for partial matching via the PCRE_PARTIAL option for
    pcre_exec() and the \P data escape in pcretest.

20. Extended pcretest with 3 new pattern features:

    (i)   A pattern option of the form ">rest-of-line" causes pcretest to
          write the compiled pattern to the file whose name is "rest-of-line".
          This is a straight binary dump of the data, with the saved pointer to
          the character tables forced to be NULL. The study data, if any, is
          written too. After writing, pcretest reads a new pattern.

    (ii)  If, instead of a pattern, "<rest-of-line" is given, pcretest reads a
          compiled pattern from the given file. There must not be any
          occurrences of "<" in the file name (pretty unlikely); if there are,
          pcretest will instead treat the initial "<" as a pattern delimiter.
          After reading in the pattern, pcretest goes on to read data lines as
          usual.

    (iii) The F pattern option causes pcretest to flip the bytes in the 32-bit
          and 16-bit fields in a compiled pattern, to simulate a pattern that
          was compiled on a host of opposite endianness.

21. The pcre-exec() function can now cope with patterns that were compiled on
    hosts of opposite endianness, with this restriction:

      As for any compiled expression that is saved and used later, the tables
      pointer field cannot be preserved; the extra_data field in the arguments
      to pcre_exec() should be used to pass in a tables address if a value
      other than the default internal tables were used at compile time.

22. Calling pcre_exec() with a negative value of the "ovecsize" parameter is
    now diagnosed as an error. Previously, most of the time, a negative number
    would have been treated as zero, but if in addition "ovector" was passed as
    NULL, a crash could occur.

23. Updated the files ltmain.sh, config.sub, config.guess, and aclocal.m4 with
    new versions from the libtool 1.5 distribution (the last one is a copy of
    a file called libtool.m4). This seems to have fixed the need to patch
    "configure" to support Darwin 1.3 (which I used to do). However, I still
    had to patch ltmain.sh to ensure that ${SED} is set (it isn't on my
    workstation).

24. Changed the PCRE licence to be the more standard "BSD" licence.


Version 4.5 01-Dec-03
---------------------

 1. There has been some re-arrangement of the code for the match() function so
    that it can be compiled in a version that does not call itself recursively.
    Instead, it keeps those local variables that need separate instances for
    each "recursion" in a frame on the heap, and gets/frees frames whenever it
    needs to "recurse". Keeping track of where control must go is done by means
    of setjmp/longjmp. The whole thing is implemented by a set of macros that
    hide most of the details from the main code, and operates only if
    NO_RECURSE is defined while compiling pcre.c. If PCRE is built using the
    "configure" mechanism, "--disable-stack-for-recursion" turns on this way of
    operating.

    To make it easier for callers to provide specially tailored get/free
    functions for this usage, two new functions, pcre_stack_malloc, and
    pcre_stack_free, are used. They are always called in strict stacking order,
    and the size of block requested is always the same.

    The PCRE_CONFIG_STACKRECURSE info parameter can be used to find out whether
    PCRE has been compiled to use the stack or the heap for recursion. The
    -C option of pcretest uses this to show which version is compiled.

    A new data escape \S, is added to pcretest; it causes the amounts of store
    obtained and freed by both kinds of malloc/free at match time to be added
    to the output.

 2. Changed the locale test to use "fr_FR" instead of "fr" because that's
    what's available on my current Linux desktop machine.

 3. When matching a UTF-8 string, the test for a valid string at the start has
    been extended. If start_offset is not zero, PCRE now checks that it points
    to a byte that is the start of a UTF-8 character. If not, it returns
    PCRE_ERROR_BADUTF8_OFFSET (-11). Note: the whole string is still checked;
    this is necessary because there may be backward assertions in the pattern.
    When matching the same subject several times, it may save resources to use
    PCRE_NO_UTF8_CHECK on all but the first call if the string is long.

 4. The code for checking the validity of UTF-8 strings has been tightened so
    that it rejects (a) strings containing 0xfe or 0xff bytes and (b) strings
    containing "overlong sequences".

 5. Fixed a bug (appearing twice) that I could not find any way of exploiting!
    I had written "if ((digitab[*p++] && chtab_digit) == 0)" where the "&&"
    should have been "&", but it just so happened that all the cases this let
    through by mistake were picked up later in the function.

 6. I had used a variable called "isblank" - this is a C99 function, causing
    some compilers to warn. To avoid this, I renamed it (as "blankclass").

 7. Cosmetic: (a) only output another newline at the end of pcretest if it is
    prompting; (b) run "./pcretest /dev/null" at the start of the test script
    so the version is shown; (c) stop "make test" echoing "./RunTest".

 8. Added patches from David Burgess to enable PCRE to run on EBCDIC systems.

 9. The prototype for memmove() for systems that don't have it was using
    size_t, but the inclusion of the header that defines size_t was later. I've
    moved the #includes for the C headers earlier to avoid this.

10. Added some adjustments to the code to make it easier to compiler on certain
    special systems:

      (a) Some "const" qualifiers were missing.
      (b) Added the macro EXPORT before all exported functions; by default this
          is defined to be empty.
      (c) Changed the dftables auxiliary program (that builds chartables.c) so
          that it reads its output file name as an argument instead of writing
          to the standard output and assuming this can be redirected.

11. In UTF-8 mode, if a recursive reference (e.g. (?1)) followed a character
    class containing characters with values greater than 255, PCRE compilation
    went into a loop.

12. A recursive reference to a subpattern that was within another subpattern
    that had a minimum quantifier of zero caused PCRE to crash. For example,
    (x(y(?2))z)? provoked this bug with a subject that got as far as the
    recursion. If the recursively-called subpattern itself had a zero repeat,
    that was OK.

13. In pcretest, the buffer for reading a data line was set at 30K, but the
    buffer into which it was copied (for escape processing) was still set at
    1024, so long lines caused crashes.

14. A pattern such as /[ab]{1,3}+/ failed to compile, giving the error
    "internal error: code overflow...". This applied to any character class
    that was followed by a possessive quantifier.

15. Modified the Makefile to add libpcre.la as a prerequisite for
    libpcreposix.la because I was told this is needed for a parallel build to
    work.

16. If a pattern that contained .* following optional items at the start was
    studied, the wrong optimizing data was generated, leading to matching
    errors. For example, studying /[ab]*.*c/ concluded, erroneously, that any
    matching string must start with a or b or c. The correct conclusion for
    this pattern is that a match can start with any character.


Version 4.4 13-Aug-03
---------------------

 1. In UTF-8 mode, a character class containing characters with values between
    127 and 255 was not handled correctly if the compiled pattern was studied.
    In fixing this, I have also improved the studying algorithm for such
    classes (slightly).

 2. Three internal functions had redundant arguments passed to them. Removal
    might give a very teeny performance improvement.

 3. Documentation bug: the value of the capture_top field in a callout is *one
    more than* the number of the hightest numbered captured substring.

 4. The Makefile linked pcretest and pcregrep with -lpcre, which could result
    in incorrectly linking with a previously installed version. They now link
    explicitly with libpcre.la.

 5. configure.in no longer needs to recognize Cygwin specially.

 6. A problem in pcre.in for Windows platforms is fixed.

 7. If a pattern was successfully studied, and the -d (or /D) flag was given to
    pcretest, it used to include the size of the study block as part of its
    output. Unfortunately, the structure contains a field that has a different
    size on different hardware architectures. This meant that the tests that
    showed this size failed. As the block is currently always of a fixed size,
    this information isn't actually particularly useful in pcretest output, so
    I have just removed it.

 8. Three pre-processor statements accidentally did not start in column 1.
    Sadly, there are *still* compilers around that complain, even though
    standard C has not required this for well over a decade. Sigh.

 9. In pcretest, the code for checking callouts passed small integers in the
    callout_data field, which is a void * field. However, some picky compilers
    complained about the casts involved for this on 64-bit systems. Now
    pcretest passes the address of the small integer instead, which should get
    rid of the warnings.

10. By default, when in UTF-8 mode, PCRE now checks for valid UTF-8 strings at
    both compile and run time, and gives an error if an invalid UTF-8 sequence
    is found. There is a option for disabling this check in cases where the
    string is known to be correct and/or the maximum performance is wanted.

11. In response to a bug report, I changed one line in Makefile.in from

        -Wl,--out-implib,.libs/lib@WIN_PREFIX@pcreposix.dll.a \
    to
        -Wl,--out-implib,.libs/@WIN_PREFIX@libpcreposix.dll.a \

    to look similar to other lines, but I have no way of telling whether this
    is the right thing to do, as I do not use Windows. No doubt I'll get told
    if it's wrong...


Version 4.3 21-May-03
---------------------

1. Two instances of @WIN_PREFIX@ omitted from the Windows targets in the
   Makefile.

2. Some refactoring to improve the quality of the code:

   (i)   The utf8_table... variables are now declared "const".

   (ii)  The code for \cx, which used the "case flipping" table to upper case
         lower case letters, now just substracts 32. This is ASCII-specific,
         but the whole concept of \cx is ASCII-specific, so it seems
         reasonable.

   (iii) PCRE was using its character types table to recognize decimal and
         hexadecimal digits in the pattern. This is silly, because it handles
         only 0-9, a-f, and A-F, but the character types table is locale-
         specific, which means strange things might happen. A private
         table is now used for this - though it costs 256 bytes, a table is
         much faster than multiple explicit tests. Of course, the standard
         character types table is still used for matching digits in subject
         strings against \d.

   (iv)  Strictly, the identifier ESC_t is reserved by POSIX (all identifiers
         ending in _t are). So I've renamed it as ESC_tee.

3. The first argument for regexec() in the POSIX wrapper should have been
   defined as "const".

4. Changed pcretest to use malloc() for its buffers so that they can be
   Electric Fenced for debugging.

5. There were several places in the code where, in UTF-8 mode, PCRE would try
   to read one or more bytes before the start of the subject string. Often this
   had no effect on PCRE's behaviour, but in some circumstances it could
   provoke a segmentation fault.

6. A lookbehind at the start of a pattern in UTF-8 mode could also cause PCRE
   to try to read one or more bytes before the start of the subject string.

7. A lookbehind in a pattern matched in non-UTF-8 mode on a PCRE compiled with
   UTF-8 support could misbehave in various ways if the subject string
   contained bytes with the 0x80 bit set and the 0x40 bit unset in a lookbehind
   area. (PCRE was not checking for the UTF-8 mode flag, and trying to move
   back over UTF-8 characters.)


Version 4.2 14-Apr-03
---------------------

1. Typo "#if SUPPORT_UTF8" instead of "#ifdef SUPPORT_UTF8" fixed.

2. Changes to the building process, supplied by Ronald Landheer-Cieslak
     [ON_WINDOWS]: new variable, "#" on non-Windows platforms
     [NOT_ON_WINDOWS]: new variable, "#" on Windows platforms
     [WIN_PREFIX]: new variable, "cyg" for Cygwin
     * Makefile.in: use autoconf substitution for OBJEXT, EXEEXT, BUILD_OBJEXT
       and BUILD_EXEEXT
     Note: automatic setting of the BUILD variables is not yet working
     set CPPFLAGS and BUILD_CPPFLAGS (but don't use yet) - should be used at
       compile-time but not at link-time
     [LINK]: use for linking executables only
     make different versions for Windows and non-Windows
     [LINKLIB]: new variable, copy of UNIX-style LINK, used for linking
       libraries
     [LINK_FOR_BUILD]: new variable
     [OBJEXT]: use throughout
     [EXEEXT]: use throughout
     <winshared>: new target
     <wininstall>: new target
     <dftables.o>: use native compiler
     <dftables>: use native linker
     <install>: handle Windows platform correctly
     <clean>: ditto
     <check>: ditto
     copy DLL to top builddir before testing

   As part of these changes, -no-undefined was removed again. This was reported
   to give trouble on HP-UX 11.0, so getting rid of it seems like a good idea
   in any case.

3. Some tidies to get rid of compiler warnings:

   . In the match_data structure, match_limit was an unsigned long int, whereas
     match_call_count was an int. I've made them both unsigned long ints.

   . In pcretest the fact that a const uschar * doesn't automatically cast to
     a void * provoked a warning.

   . Turning on some more compiler warnings threw up some "shadow" variables
     and a few more missing casts.

4. If PCRE was complied with UTF-8 support, but called without the PCRE_UTF8
   option, a class that contained a single character with a value between 128
   and 255 (e.g. /[\xFF]/) caused PCRE to crash.

5. If PCRE was compiled with UTF-8 support, but called without the PCRE_UTF8
   option, a class that contained several characters, but with at least one
   whose value was between 128 and 255 caused PCRE to crash.


Version 4.1 12-Mar-03
---------------------

1. Compiling with gcc -pedantic found a couple of places where casts were
needed, and a string in dftables.c that was longer than standard compilers are
required to support.

2. Compiling with Sun's compiler found a few more places where the code could
be tidied up in order to avoid warnings.

3. The variables for cross-compiling were called HOST_CC and HOST_CFLAGS; the
first of these names is deprecated in the latest Autoconf in favour of the name
CC_FOR_BUILD, because "host" is typically used to mean the system on which the
compiled code will be run. I can't find a reference for HOST_CFLAGS, but by
analogy I have changed it to CFLAGS_FOR_BUILD.

4. Added -no-undefined to the linking command in the Makefile, because this is
apparently helpful for Windows. To make it work, also added "-L. -lpcre" to the
linking step for the pcreposix library.

5. PCRE was failing to diagnose the case of two named groups with the same
name.

6. A problem with one of PCRE's optimizations was discovered. PCRE remembers a
literal character that is needed in the subject for a match, and scans along to
ensure that it is present before embarking on the full matching process. This
saves time in cases of nested unlimited repeats that are never going to match.
Problem: the scan can take a lot of time if the subject is very long (e.g.
megabytes), thus penalizing straightforward matches. It is now done only if the
amount of subject to be scanned is less than 1000 bytes.

7. A lesser problem with the same optimization is that it was recording the
first character of an anchored pattern as "needed", thus provoking a search
right along the subject, even when the first match of the pattern was going to
fail. The "needed" character is now not set for anchored patterns, unless it
follows something in the pattern that is of non-fixed length. Thus, it still
fulfils its original purpose of finding quick non-matches in cases of nested
unlimited repeats, but isn't used for simple anchored patterns such as /^abc/.


Version 4.0 17-Feb-03
---------------------

1. If a comment in an extended regex that started immediately after a meta-item
extended to the end of string, PCRE compiled incorrect data. This could lead to
all kinds of weird effects. Example: /#/ was bad; /()#/ was bad; /a#/ was not.

2. Moved to autoconf 2.53 and libtool 1.4.2.

3. Perl 5.8 no longer needs "use utf8" for doing UTF-8 things. Consequently,
the special perltest8 script is no longer needed - all the tests can be run
from a single perltest script.

4. From 5.004, Perl has not included the VT character (0x0b) in the set defined
by \s. It has now been removed in PCRE. This means it isn't recognized as
whitespace in /x regexes too, which is the same as Perl. Note that the POSIX
class [:space:] *does* include VT, thereby creating a mess.

5. Added the class [:blank:] (a GNU extension from Perl 5.8) to match only
space and tab.

6. Perl 5.005 was a long time ago. It's time to amalgamate the tests that use
its new features into the main test script, reducing the number of scripts.

7. Perl 5.8 has changed the meaning of patterns like /a(?i)b/. Earlier versions
were backward compatible, and made the (?i) apply to the whole pattern, as if
/i were given. Now it behaves more logically, and applies the option setting
only to what follows. PCRE has been changed to follow suit. However, if it
finds options settings right at the start of the pattern, it extracts them into
the global options, as before. Thus, they show up in the info data.

8. Added support for the \Q...\E escape sequence. Characters in between are
treated as literals. This is slightly different from Perl in that $ and @ are
also handled as literals inside the quotes. In Perl, they will cause variable
interpolation. Note the following examples:

    Pattern            PCRE matches      Perl matches

    \Qabc$xyz\E        abc$xyz           abc followed by the contents of $xyz
    \Qabc\$xyz\E       abc\$xyz          abc\$xyz
    \Qabc\E\$\Qxyz\E   abc$xyz           abc$xyz

For compatibility with Perl, \Q...\E sequences are recognized inside character
classes as well as outside them.

9. Re-organized 3 code statements in pcretest to avoid "overflow in
floating-point constant arithmetic" warnings from a Microsoft compiler. Added a
(size_t) cast to one statement in pcretest and one in pcreposix to avoid
signed/unsigned warnings.

10. SunOS4 doesn't have strtoul(). This was used only for unpicking the -o
option for pcretest, so I've replaced it by a simple function that does just
that job.

11. pcregrep was ending with code 0 instead of 2 for the commands "pcregrep" or
"pcregrep -".

12. Added "possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's
Java package. This provides some syntactic sugar for simple cases of what my
documentation calls "once-only subpatterns". A pattern such as x*+ is the same
as (?>x*). In other words, if what is inside (?>...) is just a single repeated
item, you can use this simplified notation. Note that only makes sense with
greedy quantifiers. Consequently, the use of the possessive quantifier forces
greediness, whatever the setting of the PCRE_UNGREEDY option.

13. A change of greediness default within a pattern was not taking effect at
the current level for patterns like /(b+(?U)a+)/. It did apply to parenthesized
subpatterns that followed. Patterns like /b+(?U)a+/ worked because the option
was abstracted outside.

14. PCRE now supports the \G assertion. It is true when the current matching
position is at the start point of the match. This differs from \A when the
starting offset is non-zero. Used with the /g option of pcretest (or similar
code), it works in the same way as it does for Perl's /g option. If all
alternatives of a regex begin with \G, the expression is anchored to the start
match position, and the "anchored" flag is set in the compiled expression.

15. Some bugs concerning the handling of certain option changes within patterns
have been fixed. These applied to options other than (?ims). For example,
"a(?x: b c )d" did not match "XabcdY" but did match "Xa b c dY". It should have
been the other way round. Some of this was related to change 7 above.

16. PCRE now gives errors for /[.x.]/ and /[=x=]/ as unsupported POSIX
features, as Perl does. Previously, PCRE gave the warnings only for /[[.x.]]/
and /[[=x=]]/. PCRE now also gives an error for /[:name:]/ because it supports
POSIX classes only within a class (e.g. /[[:alpha:]]/).

17. Added support for Perl's \C escape. This matches one byte, even in UTF8
mode. Unlike ".", it always matches newline, whatever the setting of
PCRE_DOTALL. However, PCRE does not permit \C to appear in lookbehind
assertions. Perl allows it, but it doesn't (in general) work because it can't
calculate the length of the lookbehind. At least, that's the case for Perl
5.8.0 - I've been told they are going to document that it doesn't work in
future.

18. Added an error diagnosis for escapes that PCRE does not support: these are
\L, \l, \N, \P, \p, \U, \u, and \X.

19. Although correctly diagnosing a missing ']' in a character class, PCRE was
reading past the end of the pattern in cases such as /[abcd/.

20. PCRE was getting more memory than necessary for patterns with classes that
contained both POSIX named classes and other characters, e.g. /[[:space:]abc/.

21. Added some code, conditional on #ifdef VPCOMPAT, to make life easier for
compiling PCRE for use with Virtual Pascal.

22. Small fix to the Makefile to make it work properly if the build is done
outside the source tree.

23. Added a new extension: a condition to go with recursion. If a conditional
subpattern starts with (?(R) the "true" branch is used if recursion has
happened, whereas the "false" branch is used only at the top level.

24. When there was a very long string of literal characters (over 255 bytes
without UTF support, over 250 bytes with UTF support), the computation of how
much memory was required could be incorrect, leading to segfaults or other
strange effects.

25. PCRE was incorrectly assuming anchoring (either to start of subject or to
start of line for a non-DOTALL pattern) when a pattern started with (.*) and
there was a subsequent back reference to those brackets. This meant that, for
example, /(.*)\d+\1/ failed to match "abc123bc". Unfortunately, it isn't
possible to check for precisely this case. All we can do is abandon the
optimization if .* occurs inside capturing brackets when there are any back
references whatsoever. (See below for a better fix that came later.)

26. The handling of the optimization for finding the first character of a
non-anchored pattern, and for finding a character that is required later in the
match were failing in some cases. This didn't break the matching; it just
failed to optimize when it could. The way this is done has been re-implemented.

27. Fixed typo in error message for invalid (?R item (it said "(?p").

28. Added a new feature that provides some of the functionality that Perl
provides with (?{...}). The facility is termed a "callout". The way it is done
in PCRE is for the caller to provide an optional function, by setting
pcre_callout to its entry point. Like pcre_malloc and pcre_free, this is a
global variable. By default it is unset, which disables all calling out. To get
the function called, the regex must include (?C) at appropriate points. This
is, in fact, equivalent to (?C0), and any number <= 255 may be given with (?C).
This provides a means of identifying different callout points. When PCRE
reaches such a point in the regex, if pcre_callout has been set, the external
function is called. It is provided with data in a structure called
pcre_callout_block, which is defined in pcre.h. If the function returns 0,
matching continues; if it returns a non-zero value, the match at the current
point fails. However, backtracking will occur if possible. [This was changed
later and other features added - see item 49 below.]

29. pcretest is upgraded to test the callout functionality. It provides a
callout function that displays information. By default, it shows the start of
the match and the current position in the text. There are some new data escapes
to vary what happens:

    \C+         in addition, show current contents of captured substrings
    \C-         do not supply a callout function
    \C!n        return 1 when callout number n is reached
    \C!n!m      return 1 when callout number n is reached for the mth time

30. If pcregrep was called with the -l option and just a single file name, it
output "<stdin>" if a match was found, instead of the file name.

31. Improve the efficiency of the POSIX API to PCRE. If the number of capturing
slots is less than POSIX_MALLOC_THRESHOLD, use a block on the stack to pass to
pcre_exec(). This saves a malloc/free per call. The default value of
POSIX_MALLOC_THRESHOLD is 10; it can be changed by --with-posix-malloc-threshold
when configuring.

32. The default maximum size of a compiled pattern is 64K. There have been a
few cases of people hitting this limit. The code now uses macros to handle the
storing of links as offsets within the compiled pattern. It defaults to 2-byte
links, but this can be changed to 3 or 4 bytes by --with-link-size when
configuring. Tests 2 and 5 work only with 2-byte links because they output
debugging information about compiled patterns.

33. Internal code re-arrangements:

(a) Moved the debugging function for printing out a compiled regex into
    its own source file (printint.c) and used #include to pull it into
    pcretest.c and, when DEBUG is defined, into pcre.c, instead of having two
    separate copies.

(b) Defined the list of op-code names for debugging as a macro in
    internal.h so that it is next to the definition of the opcodes.

(c) Defined a table of op-code lengths for simpler skipping along compiled
    code. This is again a macro in internal.h so that it is next to the
    definition of the opcodes.

34. Added support for recursive calls to individual subpatterns, along the
lines of Robin Houston's patch (but implemented somewhat differently).

35. Further mods to the Makefile to help Win32. Also, added code to pcregrep to
allow it to read and process whole directories in Win32. This code was
contributed by Lionel Fourquaux; it has not been tested by me.

36. Added support for named subpatterns. The Python syntax (?P<name>...) is
used to name a group. Names consist of alphanumerics and underscores, and must
be unique. Back references use the syntax (?P=name) and recursive calls use
(?P>name) which is a PCRE extension to the Python extension. Groups still have
numbers. The function pcre_fullinfo() can be used after compilation to extract
a name/number map. There are three relevant calls:

  PCRE_INFO_NAMEENTRYSIZE        yields the size of each entry in the map
  PCRE_INFO_NAMECOUNT            yields the number of entries
  PCRE_INFO_NAMETABLE            yields a pointer to the map.

The map is a vector of fixed-size entries. The size of each entry depends on
the length of the longest name used. The first two bytes of each entry are the
group number, most significant byte first. There follows the corresponding
name, zero terminated. The names are in alphabetical order.

37. Make the maximum literal string in the compiled code 250 for the non-UTF-8
case instead of 255. Making it the same both with and without UTF-8 support
means that the same test output works with both.

38. There was a case of malloc(0) in the POSIX testing code in pcretest. Avoid
calling malloc() with a zero argument.

39. Change 25 above had to resort to a heavy-handed test for the .* anchoring
optimization. I've improved things by keeping a bitmap of backreferences with
numbers 1-31 so that if .* occurs inside capturing brackets that are not in
fact referenced, the optimization can be applied. It is unlikely that a
relevant occurrence of .* (i.e. one which might indicate anchoring or forcing
the match to follow \n) will appear inside brackets with a number greater than
31, but if it does, any back reference > 31 suppresses the optimization.

40. Added a new compile-time option PCRE_NO_AUTO_CAPTURE. This has the effect
of disabling numbered capturing parentheses. Any opening parenthesis that is
not followed by ? behaves as if it were followed by ?: but named parentheses
can still be used for capturing (and they will acquire numbers in the usual
way).

41. Redesigned the return codes from the match() function into yes/no/error so
that errors can be passed back from deep inside the nested calls. A malloc
failure while inside a recursive subpattern call now causes the
PCRE_ERROR_NOMEMORY return instead of quietly going wrong.

42. It is now possible to set a limit on the number of times the match()
function is called in a call to pcre_exec(). This facility makes it possible to
limit the amount of recursion and backtracking, though not in a directly
obvious way, because the match() function is used in a number of different
circumstances. The count starts from zero for each position in the subject
string (for non-anchored patterns). The default limit is, for compatibility, a
large number, namely 10 000 000. You can change this in two ways:

(a) When configuring PCRE before making, you can use --with-match-limit=n
    to set a default value for the compiled library.

(b) For each call to pcre_exec(), you can pass a pcre_extra block in which
    a different value is set. See 45 below.

If the limit is exceeded, pcre_exec() returns PCRE_ERROR_MATCHLIMIT.

43. Added a new function pcre_config(int, void *) to enable run-time extraction
of things that can be changed at compile time. The first argument specifies
what is wanted and the second points to where the information is to be placed.
The current list of available information is:

  PCRE_CONFIG_UTF8

The output is an integer that is set to one if UTF-8 support is available;
otherwise it is set to zero.

  PCRE_CONFIG_NEWLINE

The output is an integer that it set to the value of the code that is used for
newline. It is either LF (10) or CR (13).

  PCRE_CONFIG_LINK_SIZE

The output is an integer that contains the number of bytes used for internal
linkage in compiled expressions. The value is 2, 3, or 4. See item 32 above.

  PCRE_CONFIG_POSIX_MALLOC_THRESHOLD

The output is an integer that contains the threshold above which the POSIX
interface uses malloc() for output vectors. See item 31 above.

  PCRE_CONFIG_MATCH_LIMIT

The output is an unsigned integer that contains the default limit of the number
of match() calls in a pcre_exec() execution. See 42 above.

44. pcretest has been upgraded by the addition of the -C option. This causes it
to extract all the available output from the new pcre_config() function, and to
output it. The program then exits immediately.

45. A need has arisen to pass over additional data with calls to pcre_exec() in
order to support additional features. One way would have been to define
pcre_exec2() (for example) with extra arguments, but this would not have been
extensible, and would also have required all calls to the original function to
be mapped to the new one. Instead, I have chosen to extend the mechanism that
is used for passing in "extra" data from pcre_study().

The pcre_extra structure is now exposed and defined in pcre.h. It currently
contains the following fields:

  flags         a bitmap indicating which of the following fields are set
  study_data    opaque data from pcre_study()
  match_limit   a way of specifying a limit on match() calls for a specific
                  call to pcre_exec()
  callout_data  data for callouts (see 49 below)

The flag bits are also defined in pcre.h, and are

  PCRE_EXTRA_STUDY_DATA
  PCRE_EXTRA_MATCH_LIMIT
  PCRE_EXTRA_CALLOUT_DATA

The pcre_study() function now returns one of these new pcre_extra blocks, with
the actual study data pointed to by the study_data field, and the
PCRE_EXTRA_STUDY_DATA flag set. This can be passed directly to pcre_exec() as
before. That is, this change is entirely upwards-compatible and requires no
change to existing code.

If you want to pass in additional data to pcre_exec(), you can either place it
in a pcre_extra block provided by pcre_study(), or create your own pcre_extra
block.

46. pcretest has been extended to test the PCRE_EXTRA_MATCH_LIMIT feature. If a
data string contains the escape sequence \M, pcretest calls pcre_exec() several
times with different match limits, until it finds the minimum value needed for
pcre_exec() to complete. The value is then output. This can be instructive; for
most simple matches the number is quite small, but for pathological cases it
gets very large very quickly.

47. There's a new option for pcre_fullinfo() called PCRE_INFO_STUDYSIZE. It
returns the size of the data block pointed to by the study_data field in a
pcre_extra block, that is, the value that was passed as the argument to
pcre_malloc() when PCRE was getting memory in which to place the information
created by pcre_study(). The fourth argument should point to a size_t variable.
pcretest has been extended so that this information is shown after a successful
pcre_study() call when information about the compiled regex is being displayed.

48. Cosmetic change to Makefile: there's no need to have / after $(DESTDIR)
because what follows is always an absolute path. (Later: it turns out that this
is more than cosmetic for MinGW, because it doesn't like empty path
components.)

49. Some changes have been made to the callout feature (see 28 above):

(i)  A callout function now has three choices for what it returns:

       0  =>  success, carry on matching
     > 0  =>  failure at this point, but backtrack if possible
     < 0  =>  serious error, return this value from pcre_exec()

     Negative values should normally be chosen from the set of PCRE_ERROR_xxx
     values. In particular, returning PCRE_ERROR_NOMATCH forces a standard
     "match failed" error. The error number PCRE_ERROR_CALLOUT is reserved for
     use by callout functions. It will never be used by PCRE itself.

(ii) The pcre_extra structure (see 45 above) has a void * field called
     callout_data, with corresponding flag bit PCRE_EXTRA_CALLOUT_DATA. The
     pcre_callout_block structure has a field of the same name. The contents of
     the field passed in the pcre_extra structure are passed to the callout
     function in the corresponding field in the callout block. This makes it
     easier to use the same callout-containing regex from multiple threads. For
     testing, the pcretest program has a new data escape

       \C*n        pass the number n (may be negative) as callout_data

     If the callout function in pcretest receives a non-zero value as
     callout_data, it returns that value.

50. Makefile wasn't handling CFLAGS properly when compiling dftables. Also,
there were some redundant $(CFLAGS) in commands that are now specified as
$(LINK), which already includes $(CFLAGS).

51. Extensions to UTF-8 support are listed below. These all apply when (a) PCRE
has been compiled with UTF-8 support *and* pcre_compile() has been compiled
with the PCRE_UTF8 flag. Patterns that are compiled without that flag assume
one-byte characters throughout. Note that case-insensitive matching applies
only to characters whose values are less than 256. PCRE doesn't support the
notion of cases for higher-valued characters.

(i)   A character class whose characters are all within 0-255 is handled as
      a bit map, and the map is inverted for negative classes. Previously, a
      character > 255 always failed to match such a class; however it should
      match if the class was a negative one (e.g. [^ab]). This has been fixed.

(ii)  A negated character class with a single character < 255 is coded as
      "not this character" (OP_NOT). This wasn't working properly when the test
      character was multibyte, either singly or repeated.

(iii) Repeats of multibyte characters are now handled correctly in UTF-8
      mode, for example: \x{100}{2,3}.

(iv)  The character escapes \b, \B, \d, \D, \s, \S, \w, and \W (either
      singly or repeated) now correctly test multibyte characters. However,
      PCRE doesn't recognize any characters with values greater than 255 as
      digits, spaces, or word characters. Such characters always match \D, \S,
      and \W, and never match \d, \s, or \w.

(v)   Classes may now contain characters and character ranges with values
      greater than 255. For example: [ab\x{100}-\x{400}].

(vi)  pcregrep now has a --utf-8 option (synonym -u) which makes it call
      PCRE in UTF-8 mode.

52. The info request value PCRE_INFO_FIRSTCHAR has been renamed
PCRE_INFO_FIRSTBYTE because it is a byte value. However, the old name is
retained for backwards compatibility. (Note that LASTLITERAL is also a byte
value.)

53. The single man page has become too large. I have therefore split it up into
a number of separate man pages. These also give rise to individual HTML pages;
these are now put in a separate directory, and there is an index.html page that
lists them all. Some hyperlinking between the pages has been installed.

54. Added convenience functions for handling named capturing parentheses.

55. Unknown escapes inside character classes (e.g. [\M]) and escapes that
aren't interpreted therein (e.g. [\C]) are literals in Perl. This is now also
true in PCRE, except when the PCRE_EXTENDED option is set, in which case they
are faulted.

56. Introduced HOST_CC and HOST_CFLAGS which can be set in the environment when
calling configure. These values are used when compiling the dftables.c program
which is run to generate the source of the default character tables. They
default to the values of CC and CFLAGS. If you are cross-compiling PCRE,
you will need to set these values.

57. Updated the building process for Windows DLL, as provided by Fred Cox.


Version 3.9 02-Jan-02
---------------------

1. A bit of extraneous text had somehow crept into the pcregrep documentation.

2. If --disable-static was given, the building process failed when trying to
build pcretest and pcregrep. (For some reason it was using libtool to compile
them, which is not right, as they aren't part of the library.)


Version 3.8 18-Dec-01
---------------------

1. The experimental UTF-8 code was completely screwed up. It was packing the
bytes in the wrong order. How dumb can you get?


Version 3.7 29-Oct-01
---------------------

1. In updating pcretest to check change 1 of version 3.6, I screwed up.
This caused pcretest, when used on the test data, to segfault. Unfortunately,
this didn't happen under Solaris 8, where I normally test things.

2. The Makefile had to be changed to make it work on BSD systems, where 'make'
doesn't seem to recognize that ./xxx and xxx are the same file. (This entry
isn't in ChangeLog distributed with 3.7 because I forgot when I hastily made
this fix an hour or so after the initial 3.7 release.)


Version 3.6 23-Oct-01
---------------------

1. Crashed with /(sens|respons)e and \1ibility/ and "sense and sensibility" if
offsets passed as NULL with zero offset count.

2. The config.guess and config.sub files had not been updated when I moved to
the latest autoconf.


Version 3.5 15-Aug-01
---------------------

1. Added some missing #if !defined NOPOSIX conditionals in pcretest.c that
had been forgotten.

2. By using declared but undefined structures, we can avoid using "void"
definitions in pcre.h while keeping the internal definitions of the structures
private.

3. The distribution is now built using autoconf 2.50 and libtool 1.4. From a
user point of view, this means that both static and shared libraries are built
by default, but this can be individually controlled. More of the work of
handling this static/shared cases is now inside libtool instead of PCRE's make
file.

4. The pcretest utility is now installed along with pcregrep because it is
useful for users (to test regexs) and by doing this, it automatically gets
relinked by libtool. The documentation has been turned into a man page, so
there are now .1, .txt, and .html versions in /doc.

5. Upgrades to pcregrep:
   (i)   Added long-form option names like gnu grep.
   (ii)  Added --help to list all options with an explanatory phrase.
   (iii) Added -r, --recursive to recurse into sub-directories.
   (iv)  Added -f, --file to read patterns from a file.

6. pcre_exec() was referring to its "code" argument before testing that
argument for NULL (and giving an error if it was NULL).

7. Upgraded Makefile.in to allow for compiling in a different directory from
the source directory.

8. Tiny buglet in pcretest: when pcre_fullinfo() was called to retrieve the
options bits, the pointer it was passed was to an int instead of to an unsigned
long int. This mattered only on 64-bit systems.

9. Fixed typo (3.4/1) in pcre.h again. Sigh. I had changed pcre.h (which is
generated) instead of pcre.in, which it its source. Also made the same change
in several of the .c files.

10. A new release of gcc defines printf() as a macro, which broke pcretest
because it had an ifdef in the middle of a string argument for printf(). Fixed
by using separate calls to printf().

11. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure
script, to force use of CR or LF instead of \n in the source. On non-Unix
systems, the value can be set in config.h.

12. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an
absolute limit. Changed the text of the error message to make this clear, and
likewise updated the man page.

13. The limit of 99 on the number of capturing subpatterns has been removed.
The new limit is 65535, which I hope will not be a "real" limit.


Version 3.4 22-Aug-00
---------------------

1. Fixed typo in pcre.h: unsigned const char * changed to const unsigned char *.

2. Diagnose condition (?(0) as an error instead of crashing on matching.


Version 3.3 01-Aug-00
---------------------

1. If an octal character was given, but the value was greater than \377, it
was not getting masked to the least significant bits, as documented. This could
lead to crashes in some systems.

2. Perl 5.6 (if not earlier versions) accepts classes like [a-\d] and treats
the hyphen as a literal. PCRE used to give an error; it now behaves like Perl.

3. Added the functions pcre_free_substring() and pcre_free_substring_list().
These just pass their arguments on to (pcre_free)(), but they are provided
because some uses of PCRE bind it to non-C systems that can call its functions,
but cannot call free() or pcre_free() directly.

4. Add "make test" as a synonym for "make check". Corrected some comments in
the Makefile.

5. Add $(DESTDIR)/ in front of all the paths in the "install" target in the
Makefile.

6. Changed the name of pgrep to pcregrep, because Solaris has introduced a
command called pgrep for grepping around the active processes.

7. Added the beginnings of support for UTF-8 character strings.

8. Arranged for the Makefile to pass over the settings of CC, CFLAGS, and
RANLIB to ./ltconfig so that they are used by libtool. I think these are all
the relevant ones. (AR is not passed because ./ltconfig does its own figuring
out for the ar command.)


Version 3.2 12-May-00
---------------------

This is purely a bug fixing release.

1. If the pattern /((Z)+|A)*/ was matched agained ZABCDEFG it matched Z instead
of ZA. This was just one example of several cases that could provoke this bug,
which was introduced by change 9 of version 2.00. The code for breaking
infinite loops after an iteration that matches an empty string was't working
correctly.

2. The pcretest program was not imitating Perl correctly for the pattern /a*/g
when matched against abbab (for example). After matching an empty string, it
wasn't forcing anchoring when setting PCRE_NOTEMPTY for the next attempt; this
caused it to match further down the string than it should.

3. The code contained an inclusion of sys/types.h. It isn't clear why this
was there because it doesn't seem to be needed, and it causes trouble on some
systems, as it is not a Standard C header. It has been removed.

4. Made 4 silly changes to the source to avoid stupid compiler warnings that
were reported on the Macintosh. The changes were from

  while ((c = *(++ptr)) != 0 && c != '\n');
to
  while ((c = *(++ptr)) != 0 && c != '\n') ;

Totally extraordinary, but if that's what it takes...

5. PCRE is being used in one environment where neither memmove() nor bcopy() is
available. Added HAVE_BCOPY and an autoconf test for it; if neither
HAVE_MEMMOVE nor HAVE_BCOPY is set, use a built-in emulation function which
assumes the way PCRE uses memmove() (always moving upwards).

6. PCRE is being used in one environment where strchr() is not available. There
was only one use in pcre.c, and writing it out to avoid strchr() probably gives
faster code anyway.


Version 3.1 09-Feb-00
---------------------

The only change in this release is the fixing of some bugs in Makefile.in for
the "install" target:

(1) It was failing to install pcreposix.h.

(2) It was overwriting the pcre.3 man page with the pcreposix.3 man page.


Version 3.0 01-Feb-00
---------------------

1. Add support for the /+ modifier to perltest (to output $` like it does in
pcretest).

2. Add support for the /g modifier to perltest.

3. Fix pcretest so that it behaves even more like Perl for /g when the pattern
matches null strings.

4. Fix perltest so that it doesn't do unwanted things when fed an empty
pattern. Perl treats empty patterns specially - it reuses the most recent
pattern, which is not what we want. Replace // by /(?#)/ in order to avoid this
effect.

5. The POSIX interface was broken in that it was just handing over the POSIX
captured string vector to pcre_exec(), but (since release 2.00) PCRE has
required a bigger vector, with some working space on the end. This means that
the POSIX wrapper now has to get and free some memory, and copy the results.

6. Added some simple autoconf support, placing the test data and the
documentation in separate directories, re-organizing some of the
information files, and making it build pcre-config (a GNU standard). Also added
libtool support for building PCRE as a shared library, which is now the
default.

7. Got rid of the leading zero in the definition of PCRE_MINOR because 08 and
09 are not valid octal constants. Single digits will be used for minor values
less than 10.

8. Defined REG_EXTENDED and REG_NOSUB as zero in the POSIX header, so that
existing programs that set these in the POSIX interface can use PCRE without
modification.

9. Added a new function, pcre_fullinfo() with an extensible interface. It can
return all that pcre_info() returns, plus additional data. The pcre_info()
function is retained for compatibility, but is considered to be obsolete.

10. Added experimental recursion feature (?R) to handle one common case that
Perl 5.6 will be able to do with (?p{...}).

11. Added support for POSIX character classes like [:alpha:], which Perl is
adopting.


Version 2.08 31-Aug-99
----------------------

1. When startoffset was not zero and the pattern began with ".*", PCRE was not
trying to match at the startoffset position, but instead was moving forward to
the next newline as if a previous match had failed.

2. pcretest was not making use of PCRE_NOTEMPTY when repeating for /g and /G,
and could get into a loop if a null string was matched other than at the start
of the subject.

3. Added definitions of PCRE_MAJOR and PCRE_MINOR to pcre.h so the version can
be distinguished at compile time, and for completeness also added PCRE_DATE.

5. Added Paul Sokolovsky's minor changes to make it easy to compile a Win32 DLL
in GnuWin32 environments.


Version 2.07 29-Jul-99
----------------------

1. The documentation is now supplied in plain text form and HTML as well as in
the form of man page sources.

2. C++ compilers don't like assigning (void *) values to other pointer types.
In particular this affects malloc(). Although there is no problem in Standard
C, I've put in casts to keep C++ compilers happy.

3. Typo on pcretest.c; a cast of (unsigned char *) in the POSIX regexec() call
should be (const char *).

4. If NOPOSIX is defined, pcretest.c compiles without POSIX support. This may
be useful for non-Unix systems who don't want to bother with the POSIX stuff.
However, I haven't made this a standard facility. The documentation doesn't
mention it, and the Makefile doesn't support it.

5. The Makefile now contains an "install" target, with editable destinations at
the top of the file. The pcretest program is not installed.

6. pgrep -V now gives the PCRE version number and date.

7. Fixed bug: a zero repetition after a literal string (e.g. /abcde{0}/) was
causing the entire string to be ignored, instead of just the last character.

8. If a pattern like /"([^\\"]+|\\.)*"/ is applied in the normal way to a
non-matching string, it can take a very, very long time, even for strings of
quite modest length, because of the nested recursion. PCRE now does better in
some of these cases. It does this by remembering the last required literal
character in the pattern, and pre-searching the subject to ensure it is present
before running the real match. In other words, it applies a heuristic to detect
some types of certain failure quickly, and in the above example, if presented
with a string that has no trailing " it gives "no match" very quickly.

9. A new runtime option PCRE_NOTEMPTY causes null string matches to be ignored;
other alternatives are tried instead.


Version 2.06 09-Jun-99
----------------------

1. Change pcretest's output for amount of store used to show just the code
space, because the remainder (the data block) varies in size between 32-bit and
64-bit systems.

2. Added an extra argument to pcre_exec() to supply an offset in the subject to
start matching at. This allows lookbehinds to work when searching for multiple
occurrences in a string.

3. Added additional options to pcretest for testing multiple occurrences:

   /+   outputs the rest of the string that follows a match
   /g   loops for multiple occurrences, using the new startoffset argument
   /G   loops for multiple occurrences by passing an incremented pointer

4. PCRE wasn't doing the "first character" optimization for patterns starting
with \b or \B, though it was doing it for other lookbehind assertions. That is,
it wasn't noticing that a match for a pattern such as /\bxyz/ has to start with
the letter 'x'. On long subject strings, this gives a significant speed-up.


Version 2.05 21-Apr-99
----------------------

1. Changed the type of magic_number from int to long int so that it works
properly on 16-bit systems.

2. Fixed a bug which caused patterns starting with .* not to work correctly
when the subject string contained newline characters. PCRE was assuming
anchoring for such patterns in all cases, which is not correct because .* will
not pass a newline unless PCRE_DOTALL is set. It now assumes anchoring only if
DOTALL is set at top level; otherwise it knows that patterns starting with .*
must be retried after every newline in the subject.


Version 2.04 18-Feb-99
----------------------

1. For parenthesized subpatterns with repeats whose minimum was zero, the
computation of the store needed to hold the pattern was incorrect (too large).
If such patterns were nested a few deep, this could multiply and become a real
problem.

2. Added /M option to pcretest to show the memory requirement of a specific
pattern. Made -m a synonym of -s (which does this globally) for compatibility.

3. Subpatterns of the form (regex){n,m} (i.e. limited maximum) were being
compiled in such a way that the backtracking after subsequent failure was
pessimal. Something like (a){0,3} was compiled as (a)?(a)?(a)? instead of
((a)((a)(a)?)?)? with disastrous performance if the maximum was of any size.


Version 2.03 02-Feb-99
----------------------

1. Fixed typo and small mistake in man page.

2. Added 4th condition (GPL supersedes if conflict) and created separate
LICENCE file containing the conditions.

3. Updated pcretest so that patterns such as /abc\/def/ work like they do in
Perl, that is the internal \ allows the delimiter to be included in the
pattern. Locked out the use of \ as a delimiter. If \ immediately follows
the final delimiter, add \ to the end of the pattern (to test the error).

4. Added the convenience functions for extracting substrings after a successful
match. Updated pcretest to make it able to test these functions.


Version 2.02 14-Jan-99
----------------------

1. Initialized the working variables associated with each extraction so that
their saving and restoring doesn't refer to uninitialized store.

2. Put dummy code into study.c in order to trick the optimizer of the IBM C
compiler for OS/2 into generating correct code. Apparently IBM isn't going to
fix the problem.

3. Pcretest: the timing code wasn't using LOOPREPEAT for timing execution
calls, and wasn't printing the correct value for compiling calls. Increased the
default value of LOOPREPEAT, and the number of significant figures in the
times.

4. Changed "/bin/rm" in the Makefile to "-rm" so it works on Windows NT.

5. Renamed "deftables" as "dftables" to get it down to 8 characters, to avoid
a building problem on Windows NT with a FAT file system.


Version 2.01 21-Oct-98
----------------------

1. Changed the API for pcre_compile() to allow for the provision of a pointer
to character tables built by pcre_maketables() in the current locale. If NULL
is passed, the default tables are used.


Version 2.00 24-Sep-98
----------------------

1. Since the (>?) facility is in Perl 5.005, don't require PCRE_EXTRA to enable
it any more.

2. Allow quantification of (?>) groups, and make it work correctly.

3. The first character computation wasn't working for (?>) groups.

4. Correct the implementation of \Z (it is permitted to match on the \n at the
end of the subject) and add 5.005's \z, which really does match only at the
very end of the subject.

5. Remove the \X "cut" facility; Perl doesn't have it, and (?> is neater.

6. Remove the ability to specify CASELESS, MULTILINE, DOTALL, and
DOLLAR_END_ONLY at runtime, to make it possible to implement the Perl 5.005
localized options. All options to pcre_study() were also removed.

7. Add other new features from 5.005:

   $(?<=           positive lookbehind
   $(?<!           negative lookbehind
   (?imsx-imsx)    added the unsetting capability
                   such a setting is global if at outer level; local otherwise
   (?imsx-imsx:)   non-capturing groups with option setting
   (?(cond)re|re)  conditional pattern matching

   A backreference to itself in a repeated group matches the previous
   captured string.

8. General tidying up of studying (both automatic and via "study")
consequential on the addition of new assertions.

9. As in 5.005, unlimited repeated groups that could match an empty substring
are no longer faulted at compile time. Instead, the loop is forcibly broken at
runtime if any iteration does actually match an empty substring.

10. Include the RunTest script in the distribution.

11. Added tests from the Perl 5.005_02 distribution. This showed up a few
discrepancies, some of which were old and were also with respect to 5.004. They
have now been fixed.


Version 1.09 28-Apr-98
----------------------

1. A negated single character class followed by a quantifier with a minimum
value of one (e.g.  [^x]{1,6}  ) was not compiled correctly. This could lead to
program crashes, or just wrong answers. This did not apply to negated classes
containing more than one character, or to minima other than one.


Version 1.08 27-Mar-98
----------------------

1. Add PCRE_UNGREEDY to invert the greediness of quantifiers.

2. Add (?U) and (?X) to set PCRE_UNGREEDY and PCRE_EXTRA respectively. The
latter must appear before anything that relies on it in the pattern.


Version 1.07 16-Feb-98
----------------------

1. A pattern such as /((a)*)*/ was not being diagnosed as in error (unlimited
repeat of a potentially empty string).


Version 1.06 23-Jan-98
----------------------

1. Added Markus Oberhumer's little patches for C++.

2. Literal strings longer than 255 characters were broken.


Version 1.05 23-Dec-97
----------------------

1. Negated character classes containing more than one character were failing if
PCRE_CASELESS was set at run time.


Version 1.04 19-Dec-97
----------------------

1. Corrected the man page, where some "const" qualifiers had been omitted.

2. Made debugging output print "{0,xxx}" instead of just "{,xxx}" to agree with
input syntax.

3. Fixed memory leak which occurred when a regex with back references was
matched with an offsets vector that wasn't big enough. The temporary memory
that is used in this case wasn't being freed if the match failed.

4. Tidied pcretest to ensure it frees memory that it gets.

5. Temporary memory was being obtained in the case where the passed offsets
vector was exactly big enough.

6. Corrected definition of offsetof() from change 5 below.

7. I had screwed up change 6 below and broken the rules for the use of
setjmp(). Now fixed.


Version 1.03 18-Dec-97
----------------------

1. A erroneous regex with a missing opening parenthesis was correctly
diagnosed, but PCRE attempted to access brastack[-1], which could cause crashes
on some systems.

2. Replaced offsetof(real_pcre, code) by offsetof(real_pcre, code[0]) because
it was reported that one broken compiler failed on the former because "code" is
also an independent variable.

3. The erroneous regex a[]b caused an array overrun reference.

4. A regex ending with a one-character negative class (e.g. /[^k]$/) did not
fail on data ending with that character. (It was going on too far, and checking
the next character, typically a binary zero.) This was specific to the
optimized code for single-character negative classes.

5. Added a contributed patch from the TIN world which does the following:

  + Add an undef for memmove, in case the the system defines a macro for it.

  + Add a definition of offsetof(), in case there isn't one. (I don't know
    the reason behind this - offsetof() is part of the ANSI standard - but
    it does no harm).

  + Reduce the ifdef's in pcre.c using macro DPRINTF, thereby eliminating
    most of the places where whitespace preceded '#'. I have given up and
    allowed the remaining 2 cases to be at the margin.

  + Rename some variables in pcre to eliminate shadowing. This seems very
    pedantic, but does no harm, of course.

6. Moved the call to setjmp() into its own function, to get rid of warnings
from gcc -Wall, and avoided calling it at all unless PCRE_EXTRA is used.

7. Constructs such as \d{8,} were compiling into the equivalent of
\d{8}\d{0,65527} instead of \d{8}\d* which didn't make much difference to the
outcome, but in this particular case used more store than had been allocated,
which caused the bug to be discovered because it threw up an internal error.

8. The debugging code in both pcre and pcretest for outputting the compiled
form of a regex was going wrong in the case of back references followed by
curly-bracketed repeats.


Version 1.02 12-Dec-97
----------------------

1. Typos in pcre.3 and comments in the source fixed.

2. Applied a contributed patch to get rid of places where it used to remove
'const' from variables, and fixed some signed/unsigned and uninitialized
variable warnings.

3. Added the "runtest" target to Makefile.

4. Set default compiler flag to -O2 rather than just -O.


Version 1.01 19-Nov-97
----------------------

1. PCRE was failing to diagnose unlimited repeat of empty string for patterns
like /([ab]*)*/, that is, for classes with more than one character in them.

2. Likewise, it wasn't diagnosing patterns with "once-only" subpatterns, such
as /((?>a*))*/ (a PCRE_EXTRA facility).


Version 1.00 18-Nov-97
----------------------

1. Added compile-time macros to support systems such as SunOS4 which don't have
memmove() or strerror() but have other things that can be used instead.

2. Arranged that "make clean" removes the executables.


Version 0.99 27-Oct-97
----------------------

1. Fixed bug in code for optimizing classes with only one character. It was
initializing a 32-byte map regardless, which could cause it to run off the end
of the memory it had got.

2. Added, conditional on PCRE_EXTRA, the proposed (?>REGEX) construction.


Version 0.98 22-Oct-97
----------------------

1. Fixed bug in code for handling temporary memory usage when there are more
back references than supplied space in the ovector. This could cause segfaults.


Version 0.97 21-Oct-97
----------------------

1. Added the \X "cut" facility, conditional on PCRE_EXTRA.

2. Optimized negated single characters not to use a bit map.

3. Brought error texts together as macro definitions; clarified some of them;
fixed one that was wrong - it said "range out of order" when it meant "invalid
escape sequence".

4. Changed some char * arguments to const char *.

5. Added PCRE_NOTBOL and PCRE_NOTEOL (from POSIX).

6. Added the POSIX-style API wrapper in pcreposix.a and testing facilities in
pcretest.


Version 0.96 16-Oct-97
----------------------

1. Added a simple "pgrep" utility to the distribution.

2. Fixed an incompatibility with Perl: "{" is now treated as a normal character
unless it appears in one of the precise forms "{ddd}", "{ddd,}", or "{ddd,ddd}"
where "ddd" means "one or more decimal digits".

3. Fixed serious bug. If a pattern had a back reference, but the call to
pcre_exec() didn't supply a large enough ovector to record the related
identifying subpattern, the match always failed. PCRE now remembers the number
of the largest back reference, and gets some temporary memory in which to save
the offsets during matching if necessary, in order to ensure that
backreferences always work.

4. Increased the compatibility with Perl in a number of ways:

  (a) . no longer matches \n by default; an option PCRE_DOTALL is provided
      to request this handling. The option can be set at compile or exec time.

  (b) $ matches before a terminating newline by default; an option
      PCRE_DOLLAR_ENDONLY is provided to override this (but not in multiline
      mode). The option can be set at compile or exec time.

  (c) The handling of \ followed by a digit other than 0 is now supposed to be
      the same as Perl's. If the decimal number it represents is less than 10
      or there aren't that many previous left capturing parentheses, an octal
      escape is read. Inside a character class, it's always an octal escape,
      even if it is a single digit.

  (d) An escaped but undefined alphabetic character is taken as a literal,
      unless PCRE_EXTRA is set. Currently this just reserves the remaining
      escapes.

  (e) {0} is now permitted. (The previous item is removed from the compiled
      pattern).

5. Changed all the names of code files so that the basic parts are no longer
than 10 characters, and abolished the teeny "globals.c" file.

6. Changed the handling of character classes; they are now done with a 32-byte
bit map always.

7. Added the -d and /D options to pcretest to make it possible to look at the
internals of compilation without having to recompile pcre.


Version 0.95 23-Sep-97
----------------------

1. Fixed bug in pre-pass concerning escaped "normal" characters such as \x5c or
\x20 at the start of a run of normal characters. These were being treated as
real characters, instead of the source characters being re-checked.


Version 0.94 18-Sep-97
----------------------

1. The functions are now thread-safe, with the caveat that the global variables
containing pointers to malloc() and free() or alternative functions are the
same for all threads.

2. Get pcre_study() to generate a bitmap of initial characters for non-
anchored patterns when this is possible, and use it if passed to pcre_exec().


Version 0.93 15-Sep-97
----------------------

1. /(b)|(:+)/ was computing an incorrect first character.

2. Add pcre_study() to the API and the passing of pcre_extra to pcre_exec(),
but not actually doing anything yet.

3. Treat "-" characters in classes that cannot be part of ranges as literals,
as Perl does (e.g. [-az] or [az-]).

4. Set the anchored flag if a branch starts with .* or .*? because that tests
all possible positions.

5. Split up into different modules to avoid including unneeded functions in a
compiled binary. However, compile and exec are still in one module. The "study"
function is split off.

6. The character tables are now in a separate module whose source is generated
by an auxiliary program - but can then be edited by hand if required. There are
now no calls to isalnum(), isspace(), isdigit(), isxdigit(), tolower() or
toupper() in the code.

7. Turn the malloc/free funtions variables into pcre_malloc and pcre_free and
make them global. Abolish the function for setting them, as the caller can now
set them directly.


Version 0.92 11-Sep-97
----------------------

1. A repeat with a fixed maximum and a minimum of 1 for an ordinary character
(e.g. /a{1,3}/) was broken (I mis-optimized it).

2. Caseless matching was not working in character classes if the characters in
the pattern were in upper case.

3. Make ranges like [W-c] work in the same way as Perl for caseless matching.

4. Make PCRE_ANCHORED public and accept as a compile option.

5. Add an options word to pcre_exec() and accept PCRE_ANCHORED and
PCRE_CASELESS at run time. Add escapes \A and \I to pcretest to cause it to
pass them.

6. Give an error if bad option bits passed at compile or run time.

7. Add PCRE_MULTILINE at compile and exec time, and (?m) as well. Add \M to
pcretest to cause it to pass that flag.

8. Add pcre_info(), to get the number of identifying subpatterns, the stored
options, and the first character, if set.

9. Recognize C+ or C{n,m} where n >= 1 as providing a fixed starting character.


Version 0.91 10-Sep-97
----------------------

1. PCRE was failing to diagnose unlimited repeats of subpatterns that could
match the empty string as in /(a*)*/. It was looping and ultimately crashing.

2. PCRE was looping on encountering an indefinitely repeated back reference to
a subpattern that had matched an empty string, e.g. /(a|)\1*/. It now does what
Perl does - treats the match as successful.

****
usr/share/doc/alt-pcre802/NEWS000064400000043674150403561450011705 0ustar00News about PCRE releases
------------------------

Release 8.02 19-Mar-2010
------------------------

Another bug-fix release.


Release 8.01 19-Jan-2010
------------------------

This is a bug-fix release. Several bugs in the code itself and some bugs and
infelicities in the build system have been fixed.


Release 8.00 19-Oct-09
----------------------

Bugs have been fixed in the library and in pcregrep. There are also some
enhancements. Restrictions on patterns used for partial matching have been
removed, extra information is given for partial matches, the partial matching
process has been improved, and an option to make a partial match override a
full match is available. The "study" process has been enhanced by finding a
lower bound matching length. Groups with duplicate numbers may now have
duplicated names without the use of PCRE_DUPNAMES. However, they may not have
different names. The documentation has been revised to reflect these changes.
The version number has been expanded to 3 digits as it is clear that the rate
of change is not slowing down.


Release 7.9 11-Apr-09
---------------------

Mostly bugfixes and tidies with just a couple of minor functional additions.


Release 7.8 05-Sep-08
---------------------

More bug fixes, plus a performance improvement in Unicode character property
lookup.


Release 7.7 07-May-08
---------------------

This is once again mainly a bug-fix release, but there are a couple of new
features.


Release 7.6 28-Jan-08
---------------------

The main reason for having this release so soon after 7.5 is because it fixes a
potential buffer overflow problem in pcre_compile() when run in UTF-8 mode. In
addition, the CMake configuration files have been brought up to date.


Release 7.5 10-Jan-08
---------------------

This is mainly a bug-fix release. However the ability to link pcregrep with
libz or libbz2 and the ability to link pcretest with libreadline have been
added. Also the --line-offsets and --file-offsets options were added to
pcregrep.


Release 7.4 21-Sep-07
---------------------

The only change of specification is the addition of options to control whether
\R matches any Unicode line ending (the default) or just CR, LF, and CRLF.
Otherwise, the changes are bug fixes and a refactoring to reduce the number of
relocations needed in a shared library. There have also been some documentation
updates, in particular, some more information about using CMake to build PCRE
has been added to the NON-UNIX-USE file.


Release 7.3 28-Aug-07
---------------------

Most changes are bug fixes. Some that are not:

1. There is some support for Perl 5.10's experimental "backtracking control
   verbs" such as (*PRUNE).

2. UTF-8 checking is now as per RFC 3629 instead of RFC 2279; this is more
   restrictive in the strings it accepts.

3. Checking for potential integer overflow has been made more dynamic, and as a
   consequence there is no longer a hard limit on the size of a subpattern that
   has a limited repeat count.

4. When CRLF is a valid line-ending sequence, pcre_exec() and pcre_dfa_exec()
   no longer advance by two characters instead of one when an unanchored match
   fails at CRLF if there are explicit CR or LF matches within the pattern.
   This gets rid of some anomalous effects that previously occurred.

5. Some PCRE-specific settings for varying the newline options at the start of
   a pattern have been added.


Release 7.2 19-Jun-07
---------------------

WARNING: saved patterns that were compiled by earlier versions of PCRE must be
recompiled for use with 7.2 (necessitated by the addition of \K, \h, \H, \v,
and \V).

Correction to the notes for 7.1: the note about shared libraries for Windows is
wrong. Previously, three libraries were built, but each could function
independently. For example, the pcreposix library also included all the
functions from the basic pcre library. The change is that the three libraries
are no longer independent. They are like the Unix libraries. To use the
pcreposix functions, for example, you need to link with both the pcreposix and
the basic pcre library.

Some more features from Perl 5.10 have been added:

  (?-n) and (?+n) relative references for recursion and subroutines.

  (?(-n) and (?(+n) relative references as conditions.

  \k{name} and \g{name} are synonyms for \k<name>.

  \K to reset the start of the matched string; for example, (foo)\Kbar
  matches bar preceded by foo, but only sets bar as the matched string.

  (?| introduces a group where the capturing parentheses in each alternative
  start from the same number; for example, (?|(abc)|(xyz)) sets capturing
  parentheses number 1 in both cases.

  \h, \H, \v, \V match horizontal and vertical whitespace, respectively.


Release 7.1 24-Apr-07
---------------------

There is only one new feature in this release: a linebreak setting of
PCRE_NEWLINE_ANYCRLF. It is a cut-down version of PCRE_NEWLINE_ANY, which
recognizes only CRLF, CR, and LF as linebreaks.

A few bugs are fixed (see ChangeLog for details), but the major change is a
complete re-implementation of the build system. This now has full Autotools
support and so is now "standard" in some sense. It should help with compiling
PCRE in a wide variety of environments.

NOTE: when building shared libraries for Windows, three dlls are now built,
called libpcre, libpcreposix, and libpcrecpp. Previously, everything was
included in a single dll.

Another important change is that the dftables auxiliary program is no longer
compiled and run at "make" time by default. Instead, a default set of character
tables (assuming ASCII coding) is used. If you want to use dftables to generate
the character tables as previously, add --enable-rebuild-chartables to the
"configure" command. You must do this if you are compiling PCRE to run on a
system that uses EBCDIC code.

There is a discussion about character tables in the README file. The default is
not to use dftables so that that there is no problem when cross-compiling.


Release 7.0 19-Dec-06
---------------------

This release has a new major number because there have been some internal
upheavals to facilitate the addition of new optimizations and other facilities,
and to make subsequent maintenance and extension easier. Compilation is likely
to be a bit slower, but there should be no major effect on runtime performance.
Previously compiled patterns are NOT upwards compatible with this release. If
you have saved compiled patterns from a previous release, you will have to
re-compile them. Important changes that are visible to users are:

1. The Unicode property tables have been updated to Unicode 5.0.0, which adds
   some more scripts.

2. The option PCRE_NEWLINE_ANY causes PCRE to recognize any Unicode newline
   sequence as a newline.

3. The \R escape matches a single Unicode newline sequence as a single unit.

4. New features that will appear in Perl 5.10 are now in PCRE. These include
   alternative Perl syntax for named parentheses, and Perl syntax for
   recursion.

5. The C++ wrapper interface has been extended by the addition of a
   QuoteMeta function and the ability to allow copy construction and
   assignment.

For a complete list of changes, see the ChangeLog file.


Release 6.7 04-Jul-06
---------------------

The main additions to this release are the ability to use the same name for
multiple sets of parentheses, and support for CRLF line endings in both the
library and pcregrep (and in pcretest for testing).

Thanks to Ian Taylor, the stack usage for many kinds of pattern has been
significantly reduced for certain subject strings.


Release 6.5 01-Feb-06
---------------------

Important changes in this release:

1. A number of new features have been added to pcregrep.

2. The Unicode property tables have been updated to Unicode 4.1.0, and the
   supported properties have been extended with script names such as "Arabic",
   and the derived properties "Any" and "L&". This has necessitated a change to
   the interal format of compiled patterns. Any saved compiled patterns that
   use \p or \P must be recompiled.

3. The specification of recursion in patterns has been changed so that all
   recursive subpatterns are automatically treated as atomic groups. Thus, for
   example, (?R) is treated as if it were (?>(?R)). This is necessary because
   otherwise there are situations where recursion does not work.

See the ChangeLog for a complete list of changes, which include a number of bug
fixes and tidies.


Release 6.0 07-Jun-05
---------------------

The release number has been increased to 6.0 because of the addition of several
major new pieces of functionality.

A new function, pcre_dfa_exec(), which implements pattern matching using a DFA
algorithm, has been added. This has a number of advantages for certain cases,
though it does run more slowly, and lacks the ability to capture substrings. On
the other hand, it does find all matches, not just the first, and it works
better for partial matching. The pcrematching man page discusses the
differences.

The pcretest program has been enhanced so that it can make use of the new
pcre_dfa_exec() matching function and the extra features it provides.

The distribution now includes a C++ wrapper library. This is built
automatically if a C++ compiler is found. The pcrecpp man page discusses this
interface.

The code itself has been re-organized into many more files, one for each
function, so it no longer requires everything to be linked in when static
linkage is used. As a consequence, some internal functions have had to have
their names exposed. These functions all have names starting with _pcre_. They
are undocumented, and are not intended for use by outside callers.

The pcregrep program has been enhanced with new functionality such as
multiline-matching and options for output more matching context. See the
ChangeLog for a complete list of changes to the library and the utility
programs.


Release 5.0 13-Sep-04
---------------------

The licence under which PCRE is released has been changed to the more
conventional "BSD" licence.

In the code, some bugs have been fixed, and there are also some major changes
in this release (which is why I've increased the number to 5.0). Some changes
are internal rearrangements, and some provide a number of new facilities. The
new features are:

1. There's an "automatic callout" feature that inserts callouts before every
   item in the regex, and there's a new callout field that gives the position
   in the pattern - useful for debugging and tracing.

2. The extra_data structure can now be used to pass in a set of character
   tables at exec time. This is useful if compiled regex are saved and re-used
   at a later time when the tables may not be at the same address. If the
   default internal tables are used, the pointer saved with the compiled
   pattern is now set to NULL, which means that you don't need to do anything
   special unless you are using custom tables.

3. It is possible, with some restrictions on the content of the regex, to
   request "partial" matching. A special return code is given if all of the
   subject string matched part of the regex. This could be useful for testing
   an input field as it is being typed.

4. There is now some optional support for Unicode character properties, which
   means that the patterns items such as \p{Lu} and \X can now be used. Only
   the general category properties are supported. If PCRE is compiled with this
   support, an additional 90K data structure is include, which increases the
   size of the library dramatically.

5. There is support for saving compiled patterns and re-using them later.

6. There is support for running regular expressions that were compiled on a
   different host with the opposite endianness.

7. The pcretest program has been extended to accommodate the new features.

The main internal rearrangement is that sequences of literal characters are no
longer handled as strings. Instead, each character is handled on its own. This
makes some UTF-8 handling easier, and makes the support of partial matching
possible. Compiled patterns containing long literal strings will be larger as a
result of this change; I hope that performance will not be much affected.


Release 4.5 01-Dec-03
---------------------

Again mainly a bug-fix and tidying release, with only a couple of new features:

1. It's possible now to compile PCRE so that it does not use recursive
function calls when matching. Instead it gets memory from the heap. This slows
things down, but may be necessary on systems with limited stacks.

2. UTF-8 string checking has been tightened to reject overlong sequences and to
check that a starting offset points to the start of a character. Failure of the
latter returns a new error code: PCRE_ERROR_BADUTF8_OFFSET.

3. PCRE can now be compiled for systems that use EBCDIC code.


Release 4.4 21-Aug-03
---------------------

This is mainly a bug-fix and tidying release. The only new feature is that PCRE
checks UTF-8 strings for validity by default. There is an option to suppress
this, just in case anybody wants that teeny extra bit of performance.


Releases 4.1 - 4.3
------------------

Sorry, I forgot about updating the NEWS file for these releases. Please take a
look at ChangeLog.


Release 4.0 17-Feb-03
---------------------

There have been a lot of changes for the 4.0 release, adding additional
functionality and mending bugs. Below is a list of the highlights of the new
functionality. For full details of these features, please consult the
documentation. For a complete list of changes, see the ChangeLog file.

1. Support for Perl's \Q...\E escapes.

2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java
package. They provide some syntactic sugar for simple cases of "atomic
grouping".

3. Support for the \G assertion. It is true when the current matching position
is at the start point of the match.

4. A new feature that provides some of the functionality that Perl provides
with (?{...}). The facility is termed a "callout". The way it is done in PCRE
is for the caller to provide an optional function, by setting pcre_callout to
its entry point. To get the function called, the regex must include (?C) at
appropriate points.

5. Support for recursive calls to individual subpatterns. This makes it really
easy to get totally confused.

6. Support for named subpatterns. The Python syntax (?P<name>...) is used to
name a group.

7. Several extensions to UTF-8 support; it is now fairly complete. There is an
option for pcregrep to make it operate in UTF-8 mode.

8. The single man page has been split into a number of separate man pages.
These also give rise to individual HTML pages which are put in a separate
directory. There is an index.html page that lists them all. Some hyperlinking
between the pages has been installed.


Release 3.5 15-Aug-01
---------------------

1. The configuring system has been upgraded to use later versions of autoconf
and libtool. By default it builds both a shared and a static library if the OS
supports it. You can use --disable-shared or --disable-static on the configure
command if you want only one of them.

2. The pcretest utility is now installed along with pcregrep because it is
useful for users (to test regexs) and by doing this, it automatically gets
relinked by libtool. The documentation has been turned into a man page, so
there are now .1, .txt, and .html versions in /doc.

3. Upgrades to pcregrep:
   (i)   Added long-form option names like gnu grep.
   (ii)  Added --help to list all options with an explanatory phrase.
   (iii) Added -r, --recursive to recurse into sub-directories.
   (iv)  Added -f, --file to read patterns from a file.

4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure
script, to force use of CR or LF instead of \n in the source. On non-Unix
systems, the value can be set in config.h.

5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an
absolute limit. Changed the text of the error message to make this clear, and
likewise updated the man page.

6. The limit of 99 on the number of capturing subpatterns has been removed.
The new limit is 65535, which I hope will not be a "real" limit.


Release 3.3 01-Aug-00
---------------------

There is some support for UTF-8 character strings. This is incomplete and
experimental. The documentation describes what is and what is not implemented.
Otherwise, this is just a bug-fixing release.


Release 3.0 01-Feb-00
---------------------

1. A "configure" script is now used to configure PCRE for Unix systems. It
builds a Makefile, a config.h file, and the pcre-config script.

2. PCRE is built as a shared library by default.

3. There is support for POSIX classes such as [:alpha:].

5. There is an experimental recursion feature.

----------------------------------------------------------------------------
          IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00

Please note that there has been a change in the API such that a larger
ovector is required at matching time, to provide some additional workspace.
The new man page has details. This change was necessary in order to support
some of the new functionality in Perl 5.005.

          IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00

Another (I hope this is the last!) change has been made to the API for the
pcre_compile() function. An additional argument has been added to make it
possible to pass over a pointer to character tables built in the current
locale by pcre_maketables(). To use the default tables, this new arguement
should be passed as NULL.

          IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05

Yet another (and again I hope this really is the last) change has been made
to the API for the pcre_exec() function. An additional argument has been
added to make it possible to start the match other than at the start of the
subject string. This is important if there are lookbehinds. The new man
page has the details, but you just want to convert existing programs, all
you need to do is to stick in a new fifth argument to pcre_exec(), with a
value of zero. For example, change

  pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize)
to
  pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize)

****
usr/share/doc/alt-pcre802/AUTHORS000064400000000623150403561460012242 0ustar00THE MAIN PCRE LIBRARY
---------------------

Written by:       Philip Hazel
Email local part: ph10
Email domain:     cam.ac.uk

University of Cambridge Computing Service,
Cambridge, England.

Copyright (c) 1997-2010 University of Cambridge
All rights reserved


THE C++ WRAPPER LIBRARY
-----------------------

Written by:       Google Inc.

Copyright (c) 2007-2010 Google Inc
All rights reserved

####
usr/share/doc/alt-pcre802/COPYING000064400000000137150403561460012225 0ustar00PCRE LICENCE

Please see the file LICENCE in the PCRE distribution for licensing details.

End
usr/share/doc/alt-pcre802/LICENCE000064400000004700150403561460012157 0ustar00PCRE LICENCE
------------

PCRE is a library of functions to support regular expressions whose syntax
and semantics are as close as possible to those of the Perl 5 language.

Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
specified below. The documentation for PCRE, supplied in the "doc"
directory, is distributed under the same terms as the software itself.

The basic library functions are written in C and are freestanding. Also
included in the distribution is a set of C++ wrapper functions.


THE BASIC LIBRARY FUNCTIONS
---------------------------

Written by:       Philip Hazel
Email local part: ph10
Email domain:     cam.ac.uk

University of Cambridge Computing Service,
Cambridge, England.

Copyright (c) 1997-2010 University of Cambridge
All rights reserved.


THE C++ WRAPPER FUNCTIONS
-------------------------

Contributed by:   Google Inc.

Copyright (c) 2007-2010, Google Inc.
All rights reserved.


THE "BSD" LICENCE
-----------------

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

    * Redistributions of source code must retain the above copyright notice,
      this list of conditions and the following disclaimer.

    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in the
      documentation and/or other materials provided with the distribution.

    * Neither the name of the University of Cambridge nor the name of Google
      Inc. nor the names of their contributors may be used to endorse or
      promote products derived from this software without specific prior
      written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

End
usr/share/doc/alt-pcre802/README000064400000106626150403561460012064 0ustar00README file for PCRE (Perl-compatible regular expression library)
-----------------------------------------------------------------

The latest release of PCRE is always available in three alternative formats
from:

  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.gz
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.tar.bz2
  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/pcre-xxx.zip

There is a mailing list for discussion about the development of PCRE at

  pcre-dev@exim.org

Please read the NEWS file if you are upgrading from a previous release.
The contents of this README file are:

  The PCRE APIs
  Documentation for PCRE
  Contributions by users of PCRE
  Building PCRE on non-Unix systems
  Building PCRE on Unix-like systems
  Retrieving configuration information on Unix-like systems
  Shared libraries on Unix-like systems
  Cross-compiling on Unix-like systems
  Using HP's ANSI C++ compiler (aCC)
  Using PCRE from MySQL
  Making new tarballs
  Testing PCRE
  Character tables
  File manifest


The PCRE APIs
-------------

PCRE is written in C, and it has its own API. The distribution also includes a
set of C++ wrapper functions (see the pcrecpp man page for details), courtesy
of Google Inc.

In addition, there is a set of C wrapper functions that are based on the POSIX
regular expression API (see the pcreposix man page). These end up in the
library called libpcreposix. Note that this just provides a POSIX calling
interface to PCRE; the regular expressions themselves still follow Perl syntax
and semantics. The POSIX API is restricted, and does not give full access to
all of PCRE's facilities.

The header file for the POSIX-style functions is called pcreposix.h. The
official POSIX name is regex.h, but I did not want to risk possible problems
with existing files of that name by distributing it that way. To use PCRE with
an existing program that uses the POSIX API, pcreposix.h will have to be
renamed or pointed at by a link.

If you are using the POSIX interface to PCRE and there is already a POSIX regex
library installed on your system, as well as worrying about the regex.h header
file (as mentioned above), you must also take care when linking programs to
ensure that they link with PCRE's libpcreposix library. Otherwise they may pick
up the POSIX functions of the same name from the other library.

One way of avoiding this confusion is to compile PCRE with the addition of
-Dregcomp=PCREregcomp (and similarly for the other POSIX functions) to the
compiler flags (CFLAGS if you are using "configure" -- see below). This has the
effect of renaming the functions so that the names no longer clash. Of course,
you have to do the same thing for your applications, or write them using the
new names.


Documentation for PCRE
----------------------

If you install PCRE in the normal way on a Unix-like system, you will end up
with a set of man pages whose names all start with "pcre". The one that is just
called "pcre" lists all the others. In addition to these man pages, the PCRE
documentation is supplied in two other forms:

  1. There are files called doc/pcre.txt, doc/pcregrep.txt, and
     doc/pcretest.txt in the source distribution. The first of these is a
     concatenation of the text forms of all the section 3 man pages except
     those that summarize individual functions. The other two are the text
     forms of the section 1 man pages for the pcregrep and pcretest commands.
     These text forms are provided for ease of scanning with text editors or
     similar tools. They are installed in <prefix>/share/doc/pcre, where
     <prefix> is the installation prefix (defaulting to /usr/local).

  2. A set of files containing all the documentation in HTML form, hyperlinked
     in various ways, and rooted in a file called index.html, is distributed in
     doc/html and installed in <prefix>/share/doc/pcre/html.

Users of PCRE have contributed files containing the documentation for various
releases in CHM format. These can be found in the Contrib directory of the FTP
site (see next section).


Contributions by users of PCRE
------------------------------

You can find contributions from PCRE users in the directory

  ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib

There is a README file giving brief descriptions of what they are. Some are
complete in themselves; others are pointers to URLs containing relevant files.
Some of this material is likely to be well out-of-date. Several of the earlier
contributions provided support for compiling PCRE on various flavours of
Windows (I myself do not use Windows). Nowadays there is more Windows support
in the standard distribution, so these contibutions have been archived.


Building PCRE on non-Unix systems
---------------------------------

For a non-Unix system, please read the comments in the file NON-UNIX-USE,
though if your system supports the use of "configure" and "make" you may be
able to build PCRE in the same way as for Unix-like systems. PCRE can also be
configured in many platform environments using the GUI facility provided by
CMake's cmake-gui command. This creates Makefiles, solution files, etc.

PCRE has been compiled on many different operating systems. It should be
straightforward to build PCRE on any system that has a Standard C compiler and
library, because it uses only Standard C functions.


Building PCRE on Unix-like systems
----------------------------------

If you are using HP's ANSI C++ compiler (aCC), please see the special note
in the section entitled "Using HP's ANSI C++ compiler (aCC)" below.

The following instructions assume the use of the widely used "configure, make,
make install" process. There is also support for CMake in the PCRE
distribution; there are some comments about using CMake in the NON-UNIX-USE
file, though it can also be used in Unix-like systems.

To build PCRE on a Unix-like system, first run the "configure" command from the
PCRE distribution directory, with your current directory set to the directory
where you want the files to be created. This command is a standard GNU
"autoconf" configuration script, for which generic instructions are supplied in
the file INSTALL.

Most commonly, people build PCRE within its own distribution directory, and in
this case, on many systems, just running "./configure" is sufficient. However,
the usual methods of changing standard defaults are available. For example:

CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local

specifies that the C compiler should be run with the flags '-O2 -Wall' instead
of the default, and that "make install" should install PCRE under /opt/local
instead of the default /usr/local.

If you want to build in a different directory, just run "configure" with that
directory as current. For example, suppose you have unpacked the PCRE source
into /source/pcre/pcre-xxx, but you want to build it in /build/pcre/pcre-xxx:

cd /build/pcre/pcre-xxx
/source/pcre/pcre-xxx/configure

PCRE is written in C and is normally compiled as a C library. However, it is
possible to build it as a C++ library, though the provided building apparatus
does not have any features to support this.

There are some optional features that can be included or omitted from the PCRE
library. You can read more about them in the pcrebuild man page.

. If you want to suppress the building of the C++ wrapper library, you can add
  --disable-cpp to the "configure" command. Otherwise, when "configure" is run,
  it will try to find a C++ compiler and C++ header files, and if it succeeds,
  it will try to build the C++ wrapper.

. If you want to make use of the support for UTF-8 Unicode character strings in
  PCRE, you must add --enable-utf8 to the "configure" command. Without it, the
  code for handling UTF-8 is not included in the library. Even when included,
  it still has to be enabled by an option at run time. When PCRE is compiled
  with this option, its input can only either be ASCII or UTF-8, even when
  running on EBCDIC platforms. It is not possible to use both --enable-utf8 and
  --enable-ebcdic at the same time.

. If, in addition to support for UTF-8 character strings, you want to include
  support for the \P, \p, and \X sequences that recognize Unicode character
  properties, you must add --enable-unicode-properties to the "configure"
  command. This adds about 30K to the size of the library (in the form of a
  property table); only the basic two-letter properties such as Lu are
  supported.

. You can build PCRE to recognize either CR or LF or the sequence CRLF or any
  of the preceding, or any of the Unicode newline sequences as indicating the
  end of a line. Whatever you specify at build time is the default; the caller
  of PCRE can change the selection at run time. The default newline indicator
  is a single LF character (the Unix standard). You can specify the default
  newline indicator by adding --enable-newline-is-cr or --enable-newline-is-lf
  or --enable-newline-is-crlf or --enable-newline-is-anycrlf or
  --enable-newline-is-any to the "configure" command, respectively.

  If you specify --enable-newline-is-cr or --enable-newline-is-crlf, some of
  the standard tests will fail, because the lines in the test files end with
  LF. Even if the files are edited to change the line endings, there are likely
  to be some failures. With --enable-newline-is-anycrlf or
  --enable-newline-is-any, many tests should succeed, but there may be some
  failures.

. By default, the sequence \R in a pattern matches any Unicode line ending
  sequence. This is independent of the option specifying what PCRE considers to
  be the end of a line (see above). However, the caller of PCRE can restrict \R
  to match only CR, LF, or CRLF. You can make this the default by adding
  --enable-bsr-anycrlf to the "configure" command (bsr = "backslash R").

. When called via the POSIX interface, PCRE uses malloc() to get additional
  storage for processing capturing parentheses if there are more than 10 of
  them in a pattern. You can increase this threshold by setting, for example,

  --with-posix-malloc-threshold=20

  on the "configure" command.

. PCRE has a counter that can be set to limit the amount of resources it uses.
  If the limit is exceeded during a match, the match fails. The default is ten
  million. You can change the default by setting, for example,

  --with-match-limit=500000

  on the "configure" command. This is just the default; individual calls to
  pcre_exec() can supply their own value. There is more discussion on the
  pcreapi man page.

. There is a separate counter that limits the depth of recursive function calls
  during a matching process. This also has a default of ten million, which is
  essentially "unlimited". You can change the default by setting, for example,

  --with-match-limit-recursion=500000

  Recursive function calls use up the runtime stack; running out of stack can
  cause programs to crash in strange ways. There is a discussion about stack
  sizes in the pcrestack man page.

. The default maximum compiled pattern size is around 64K. You can increase
  this by adding --with-link-size=3 to the "configure" command. You can
  increase it even more by setting --with-link-size=4, but this is unlikely
  ever to be necessary. Increasing the internal link size will reduce
  performance.

. You can build PCRE so that its internal match() function that is called from
  pcre_exec() does not call itself recursively. Instead, it uses memory blocks
  obtained from the heap via the special functions pcre_stack_malloc() and
  pcre_stack_free() to save data that would otherwise be saved on the stack. To
  build PCRE like this, use

  --disable-stack-for-recursion

  on the "configure" command. PCRE runs more slowly in this mode, but it may be
  necessary in environments with limited stack sizes. This applies only to the
  pcre_exec() function; it does not apply to pcre_dfa_exec(), which does not
  use deeply nested recursion. There is a discussion about stack sizes in the
  pcrestack man page.

. For speed, PCRE uses four tables for manipulating and identifying characters
  whose code point values are less than 256. By default, it uses a set of
  tables for ASCII encoding that is part of the distribution. If you specify

  --enable-rebuild-chartables

  a program called dftables is compiled and run in the default C locale when
  you obey "make". It builds a source file called pcre_chartables.c. If you do
  not specify this option, pcre_chartables.c is created as a copy of
  pcre_chartables.c.dist. See "Character tables" below for further information.

. It is possible to compile PCRE for use on systems that use EBCDIC as their
  character code (as opposed to ASCII) by specifying

  --enable-ebcdic

  This automatically implies --enable-rebuild-chartables (see above). However,
  when PCRE is built this way, it always operates in EBCDIC. It cannot support
  both EBCDIC and UTF-8.

. It is possible to compile pcregrep to use libz and/or libbz2, in order to
  read .gz and .bz2 files (respectively), by specifying one or both of

  --enable-pcregrep-libz
  --enable-pcregrep-libbz2

  Of course, the relevant libraries must be installed on your system.

. It is possible to compile pcretest so that it links with the libreadline
  library, by specifying

  --enable-pcretest-libreadline

  If this is done, when pcretest's input is from a terminal, it reads it using
  the readline() function. This provides line-editing and history facilities.
  Note that libreadline is GPL-licenced, so if you distribute a binary of
  pcretest linked in this way, there may be licensing issues.

  Setting this option causes the -lreadline option to be added to the pcretest
  build. In many operating environments with a sytem-installed readline
  library this is sufficient. However, in some environments (e.g. if an
  unmodified distribution version of readline is in use), it may be necessary
  to specify something like LIBS="-lncurses" as well. This is because, to quote
  the readline INSTALL, "Readline uses the termcap functions, but does not link
  with the termcap or curses library itself, allowing applications which link
  with readline the to choose an appropriate library." If you get error
  messages about missing functions tgetstr, tgetent, tputs, tgetflag, or tgoto,
  this is the problem, and linking with the ncurses library should fix it.

The "configure" script builds the following files for the basic C library:

. Makefile is the makefile that builds the library
. config.h contains build-time configuration options for the library
. pcre.h is the public PCRE header file
. pcre-config is a script that shows the settings of "configure" options
. libpcre.pc is data for the pkg-config command
. libtool is a script that builds shared and/or static libraries
. RunTest is a script for running tests on the basic C library
. RunGrepTest is a script for running tests on the pcregrep command

Versions of config.h and pcre.h are distributed in the PCRE tarballs under the
names config.h.generic and pcre.h.generic. These are provided for those who
have to built PCRE without using "configure" or CMake. If you use "configure"
or CMake, the .generic versions are not used.

If a C++ compiler is found, the following files are also built:

. libpcrecpp.pc is data for the pkg-config command
. pcrecpparg.h is a header file for programs that call PCRE via the C++ wrapper
. pcre_stringpiece.h is the header for the C++ "stringpiece" functions

The "configure" script also creates config.status, which is an executable
script that can be run to recreate the configuration, and config.log, which
contains compiler output from tests that "configure" runs.

Once "configure" has run, you can run "make". It builds two libraries, called
libpcre and libpcreposix, a test program called pcretest, and the pcregrep
command. If a C++ compiler was found on your system, "make" also builds the C++
wrapper library, which is called libpcrecpp, and some test programs called
pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest.
Building the C++ wrapper can be disabled by adding --disable-cpp to the
"configure" command.

The command "make check" runs all the appropriate tests. Details of the PCRE
tests are given below in a separate section of this document.

You can use "make install" to install PCRE into live directories on your
system. The following are installed (file names are all relative to the
<prefix> that is set when "configure" is run):

  Commands (bin):
    pcretest
    pcregrep
    pcre-config

  Libraries (lib):
    libpcre
    libpcreposix
    libpcrecpp (if C++ support is enabled)

  Configuration information (lib/pkgconfig):
    libpcre.pc
    libpcrecpp.pc (if C++ support is enabled)

  Header files (include):
    pcre.h
    pcreposix.h
    pcre_scanner.h      )
    pcre_stringpiece.h  ) if C++ support is enabled
    pcrecpp.h           )
    pcrecpparg.h        )

  Man pages (share/man/man{1,3}):
    pcregrep.1
    pcretest.1
    pcre.3
    pcre*.3 (lots more pages, all starting "pcre")

  HTML documentation (share/doc/pcre/html):
    index.html
    *.html (lots more pages, hyperlinked from index.html)

  Text file documentation (share/doc/pcre):
    AUTHORS
    COPYING
    ChangeLog
    LICENCE
    NEWS
    README
    pcre.txt       (a concatenation of the man(3) pages)
    pcretest.txt   the pcretest man page
    pcregrep.txt   the pcregrep man page

If you want to remove PCRE from your system, you can run "make uninstall".
This removes all the files that "make install" installed. However, it does not
remove any directories, because these are often shared with other programs.


Retrieving configuration information on Unix-like systems
---------------------------------------------------------

Running "make install" installs the command pcre-config, which can be used to
recall information about the PCRE configuration and installation. For example:

  pcre-config --version

prints the version number, and

  pcre-config --libs

outputs information about where the library is installed. This command can be
included in makefiles for programs that use PCRE, saving the programmer from
having to remember too many details.

The pkg-config command is another system for saving and retrieving information
about installed libraries. Instead of separate commands for each library, a
single command is used. For example:

  pkg-config --cflags pcre

The data is held in *.pc files that are installed in a directory called
<prefix>/lib/pkgconfig.


Shared libraries on Unix-like systems
-------------------------------------

The default distribution builds PCRE as shared libraries and static libraries,
as long as the operating system supports shared libraries. Shared library
support relies on the "libtool" script which is built as part of the
"configure" process.

The libtool script is used to compile and link both shared and static
libraries. They are placed in a subdirectory called .libs when they are newly
built. The programs pcretest and pcregrep are built to use these uninstalled
libraries (by means of wrapper scripts in the case of shared libraries). When
you use "make install" to install shared libraries, pcregrep and pcretest are
automatically re-built to use the newly installed shared libraries before being
installed themselves. However, the versions left in the build directory still
use the uninstalled libraries.

To build PCRE using static libraries only you must use --disable-shared when
configuring it. For example:

./configure --prefix=/usr/gnu --disable-shared

Then run "make" in the usual way. Similarly, you can use --disable-static to
build only shared libraries.


Cross-compiling on Unix-like systems
------------------------------------

You can specify CC and CFLAGS in the normal way to the "configure" command, in
order to cross-compile PCRE for some other host. However, you should NOT
specify --enable-rebuild-chartables, because if you do, the dftables.c source
file is compiled and run on the local host, in order to generate the inbuilt
character tables (the pcre_chartables.c file). This will probably not work,
because dftables.c needs to be compiled with the local compiler, not the cross
compiler.

When --enable-rebuild-chartables is not specified, pcre_chartables.c is created
by making a copy of pcre_chartables.c.dist, which is a default set of tables
that assumes ASCII code. Cross-compiling with the default tables should not be
a problem.

If you need to modify the character tables when cross-compiling, you should
move pcre_chartables.c.dist out of the way, then compile dftables.c by hand and
run it on the local host to make a new version of pcre_chartables.c.dist.
Then when you cross-compile PCRE this new version of the tables will be used.


Using HP's ANSI C++ compiler (aCC)
----------------------------------

Unless C++ support is disabled by specifying the "--disable-cpp" option of the
"configure" script, you must include the "-AA" option in the CXXFLAGS
environment variable in order for the C++ components to compile correctly.

Also, note that the aCC compiler on PA-RISC platforms may have a defect whereby
needed libraries fail to get included when specifying the "-AA" compiler
option. If you experience unresolved symbols when linking the C++ programs,
use the workaround of specifying the following environment variable prior to
running the "configure" script:

  CXXLDFLAGS="-lstd_v2 -lCsup_v2"


Using Sun's compilers for Solaris
---------------------------------

A user reports that the following configurations work on Solaris 9 sparcv9 and
Solaris 9 x86 (32-bit):

  Solaris 9 sparcv9: ./configure --disable-cpp CC=/bin/cc CFLAGS="-m64 -g"
  Solaris 9 x86:     ./configure --disable-cpp CC=/bin/cc CFLAGS="-g"


Using PCRE from MySQL
---------------------

On systems where both PCRE and MySQL are installed, it is possible to make use
of PCRE from within MySQL, as an alternative to the built-in pattern matching.
There is a web page that tells you how to do this:

  http://www.mysqludf.org/lib_mysqludf_preg/index.php


Making new tarballs
-------------------

The command "make dist" creates three PCRE tarballs, in tar.gz, tar.bz2, and
zip formats. The command "make distcheck" does the same, but then does a trial
build of the new distribution to ensure that it works.

If you have modified any of the man page sources in the doc directory, you
should first run the PrepareRelease script before making a distribution. This
script creates the .txt and HTML forms of the documentation from the man pages.


Testing PCRE
------------

To test the basic PCRE library on a Unix system, run the RunTest script that is
created by the configuring process. There is also a script called RunGrepTest
that tests the options of the pcregrep command. If the C++ wrapper library is
built, three test programs called pcrecpp_unittest, pcre_scanner_unittest, and
pcre_stringpiece_unittest are also built.

Both the scripts and all the program tests are run if you obey "make check" or
"make test". For other systems, see the instructions in NON-UNIX-USE.

The RunTest script runs the pcretest test program (which is documented in its
own man page) on each of the testinput files in the testdata directory in
turn, and compares the output with the contents of the corresponding testoutput
files. A file called testtry is used to hold the main output from pcretest
(testsavedregex is also used as a working file). To run pcretest on just one of
the test files, give its number as an argument to RunTest, for example:

  RunTest 2

The first test file can also be fed directly into the perltest.pl script to
check that Perl gives the same results. The only difference you should see is
in the first few lines, where the Perl version is given instead of the PCRE
version.

The second set of tests check pcre_fullinfo(), pcre_info(), pcre_study(),
pcre_copy_substring(), pcre_get_substring(), pcre_get_substring_list(), error
detection, and run-time flags that are specific to PCRE, as well as the POSIX
wrapper API. It also uses the debugging flags to check some of the internals of
pcre_compile().

If you build PCRE with a locale setting that is not the standard C locale, the
character tables may be different (see next paragraph). In some cases, this may
cause failures in the second set of tests. For example, in a locale where the
isprint() function yields TRUE for characters in the range 128-255, the use of
[:isascii:] inside a character class defines a different set of characters, and
this shows up in this test as a difference in the compiled code, which is being
listed for checking. Where the comparison test output contains [\x00-\x7f] the
test will contain [\x00-\xff], and similarly in some other cases. This is not a
bug in PCRE.

The third set of tests checks pcre_maketables(), the facility for building a
set of character tables for a specific locale and using them instead of the
default tables. The tests make use of the "fr_FR" (French) locale. Before
running the test, the script checks for the presence of this locale by running
the "locale" command. If that command fails, or if it doesn't include "fr_FR"
in the list of available locales, the third test cannot be run, and a comment
is output to say why. If running this test produces instances of the error

  ** Failed to set locale "fr_FR"

in the comparison output, it means that locale is not available on your system,
despite being listed by "locale". This does not mean that PCRE is broken.

[If you are trying to run this test on Windows, you may be able to get it to
work by changing "fr_FR" to "french" everywhere it occurs. Alternatively, use
RunTest.bat. The version of RunTest.bat included with PCRE 7.4 and above uses
Windows versions of test 2. More info on using RunTest.bat is included in the
document entitled NON-UNIX-USE.]

The fourth test checks the UTF-8 support. It is not run automatically unless
PCRE is built with UTF-8 support. To do this you must set --enable-utf8 when
running "configure". This file can be also fed directly to the perltest.pl
script, provided you are running Perl 5.8 or higher.

The fifth test checks error handling with UTF-8 encoding, and internal UTF-8
features of PCRE that are not relevant to Perl.

The sixth test (which is Perl-5.10 compatible) checks the support for Unicode
character properties. It it not run automatically unless PCRE is built with
Unicode property support. To to this you must set --enable-unicode-properties
when running "configure".

The seventh, eighth, and ninth tests check the pcre_dfa_exec() alternative
matching function, in non-UTF-8 mode, UTF-8 mode, and UTF-8 mode with Unicode
property support, respectively. The eighth and ninth tests are not run
automatically unless PCRE is build with the relevant support.

The tenth test checks some internal offsets and code size features; it is run
only when the default "link size" of 2 is set (in other cases the sizes
change).

The eleventh test checks out features that are new in Perl 5.10, and the
twelfth test checks a number internals and non-Perl features concerned with
Unicode property support. It it not run automatically unless PCRE is built with
Unicode property support. To to this you must set --enable-unicode-properties
when running "configure".


Character tables
----------------

For speed, PCRE uses four tables for manipulating and identifying characters
whose code point values are less than 256. The final argument of the
pcre_compile() function is a pointer to a block of memory containing the
concatenated tables. A call to pcre_maketables() can be used to generate a set
of tables in the current locale. If the final argument for pcre_compile() is
passed as NULL, a set of default tables that is built into the binary is used.

The source file called pcre_chartables.c contains the default set of tables. By
default, this is created as a copy of pcre_chartables.c.dist, which contains
tables for ASCII coding. However, if --enable-rebuild-chartables is specified
for ./configure, a different version of pcre_chartables.c is built by the
program dftables (compiled from dftables.c), which uses the ANSI C character
handling functions such as isalnum(), isalpha(), isupper(), islower(), etc. to
build the table sources. This means that the default C locale which is set for
your system will control the contents of these default tables. You can change
the default tables by editing pcre_chartables.c and then re-building PCRE. If
you do this, you should take care to ensure that the file does not get
automatically re-generated. The best way to do this is to move
pcre_chartables.c.dist out of the way and replace it with your customized
tables.

When the dftables program is run as a result of --enable-rebuild-chartables,
it uses the default C locale that is set on your system. It does not pay
attention to the LC_xxx environment variables. In other words, it uses the
system's default locale rather than whatever the compiling user happens to have
set. If you really do want to build a source set of character tables in a
locale that is specified by the LC_xxx variables, you can run the dftables
program by hand with the -L option. For example:

  ./dftables -L pcre_chartables.c.special

The first two 256-byte tables provide lower casing and case flipping functions,
respectively. The next table consists of three 32-byte bit maps which identify
digits, "word" characters, and white space, respectively. These are used when
building 32-byte bit maps that represent character classes for code points less
than 256.

The final 256-byte table has bits indicating various character types, as
follows:

    1   white space character
    2   letter
    4   decimal digit
    8   hexadecimal digit
   16   alphanumeric or '_'
  128   regular expression metacharacter or binary zero

You should not alter the set of characters that contain the 128 bit, as that
will cause PCRE to malfunction.


File manifest
-------------

The distribution should contain the following files:

(A) Source files of the PCRE library functions and their headers:

  dftables.c              auxiliary program for building pcre_chartables.c
                            when --enable-rebuild-chartables is specified

  pcre_chartables.c.dist  a default set of character tables that assume ASCII
                            coding; used, unless --enable-rebuild-chartables is
                            specified, by copying to pcre_chartables.c

  pcreposix.c             )
  pcre_compile.c          )
  pcre_config.c           )
  pcre_dfa_exec.c         )
  pcre_exec.c             )
  pcre_fullinfo.c         )
  pcre_get.c              ) sources for the functions in the library,
  pcre_globals.c          )   and some internal functions that they use
  pcre_info.c             )
  pcre_maketables.c       )
  pcre_newline.c          )
  pcre_ord2utf8.c         )
  pcre_refcount.c         )
  pcre_study.c            )
  pcre_tables.c           )
  pcre_try_flipped.c      )
  pcre_ucd.c              )
  pcre_valid_utf8.c       )
  pcre_version.c          )
  pcre_xclass.c           )
  pcre_printint.src       ) debugging function that is #included in pcretest,
                          )   and can also be #included in pcre_compile()
  pcre.h.in               template for pcre.h when built by "configure"
  pcreposix.h             header for the external POSIX wrapper API
  pcre_internal.h         header for internal use
  ucp.h                   header for Unicode property handling

  config.h.in             template for config.h, which is built by "configure"

  pcrecpp.h               public header file for the C++ wrapper
  pcrecpparg.h.in         template for another C++ header file
  pcre_scanner.h          public header file for C++ scanner functions
  pcrecpp.cc              )
  pcre_scanner.cc         ) source for the C++ wrapper library

  pcre_stringpiece.h.in   template for pcre_stringpiece.h, the header for the
                            C++ stringpiece functions
  pcre_stringpiece.cc     source for the C++ stringpiece functions

(B) Source files for programs that use PCRE:

  pcredemo.c              simple demonstration of coding calls to PCRE
  pcregrep.c              source of a grep utility that uses PCRE
  pcretest.c              comprehensive test program

(C) Auxiliary files:

  132html                 script to turn "man" pages into HTML
  AUTHORS                 information about the author of PCRE
  ChangeLog               log of changes to the code
  CleanTxt                script to clean nroff output for txt man pages
  Detrail                 script to remove trailing spaces
  HACKING                 some notes about the internals of PCRE
  INSTALL                 generic installation instructions
  LICENCE                 conditions for the use of PCRE
  COPYING                 the same, using GNU's standard name
  Makefile.in             ) template for Unix Makefile, which is built by
                          )   "configure"
  Makefile.am             ) the automake input that was used to create
                          )   Makefile.in
  NEWS                    important changes in this release
  NON-UNIX-USE            notes on building PCRE on non-Unix systems
  PrepareRelease          script to make preparations for "make dist"
  README                  this file
  RunTest                 a Unix shell script for running tests
  RunGrepTest             a Unix shell script for pcregrep tests
  aclocal.m4              m4 macros (generated by "aclocal")
  config.guess            ) files used by libtool,
  config.sub              )   used only when building a shared library
  configure               a configuring shell script (built by autoconf)
  configure.ac            ) the autoconf input that was used to build
                          )   "configure" and config.h
  depcomp                 ) script to find program dependencies, generated by
                          )   automake
  doc/*.3                 man page sources for PCRE
  doc/*.1                 man page sources for pcregrep and pcretest
  doc/index.html.src      the base HTML page
  doc/html/*              HTML documentation
  doc/pcre.txt            plain text version of the man pages
  doc/pcretest.txt        plain text documentation of test program
  doc/perltest.txt        plain text documentation of Perl test program
  install-sh              a shell script for installing files
  libpcre.pc.in           template for libpcre.pc for pkg-config
  libpcreposix.pc.in      template for libpcreposix.pc for pkg-config
  libpcrecpp.pc.in        template for libpcrecpp.pc for pkg-config
  ltmain.sh               file used to build a libtool script
  missing                 ) common stub for a few missing GNU programs while
                          )   installing, generated by automake
  mkinstalldirs           script for making install directories
  perltest.pl             Perl test program
  pcre-config.in          source of script which retains PCRE information
  pcrecpp_unittest.cc          )
  pcre_scanner_unittest.cc     ) test programs for the C++ wrapper
  pcre_stringpiece_unittest.cc )
  testdata/testinput*     test data for main library tests
  testdata/testoutput*    expected test results
  testdata/grep*          input and output for pcregrep tests

(D) Auxiliary files for cmake support

  cmake/COPYING-CMAKE-SCRIPTS
  cmake/FindPackageHandleStandardArgs.cmake
  cmake/FindReadline.cmake
  CMakeLists.txt
  config-cmake.h.in

(E) Auxiliary files for VPASCAL

  makevp.bat
  makevp_c.txt
  makevp_l.txt
  pcregexp.pas

(F) Auxiliary files for building PCRE "by hand"

  pcre.h.generic          ) a version of the public PCRE header file
                          )   for use in non-"configure" environments
  config.h.generic        ) a version of config.h for use in non-"configure"
                          )   environments

(F) Miscellaneous

  RunTest.bat            a script for running tests under Windows

Philip Hazel
Email local part: ph10
Email domain: cam.ac.uk
Last updated: 19 January 2010
usr/share/doc/alt-pcre802-devel/pcre-config.txt000064400000004172150403561460015227 0ustar00PCRE-CONFIG(1)                                                  PCRE-CONFIG(1)



NAME
       pcre-config - program to return PCRE configuration

SYNOPSIS

       pcre-config [--prefix] [--exec-prefix] [--version] [--libs]
            [--libs-posix] [--cflags] [--cflags-posix]


DESCRIPTION

       pcre-config  returns  the configuration of the installed PCRE libraries
       and the options required to compile a program to use them.


OPTIONS

       --prefix  Writes the directory prefix used in the PCRE installation for
                 architecture   independent   files  (/usr  on  many  systems,
                 /usr/local on some systems) to the standard output.

       --exec-prefix
                 Writes the directory prefix used in the PCRE installation for
                 architecture  dependent files (normally the same as --prefix)
                 to the standard output.

       --version Writes the version number of the installed PCRE libraries  to
                 the standard output.

       --libs    Writes  to  the  standard  output  the  command  line options
                 required to link with PCRE (-lpcre on many systems).

       --libs-posix
                 Writes to  the  standard  output  the  command  line  options
                 required  to  link  with  the  PCRE  posix  emulation library
                 (-lpcreposix -lpcre on many systems).

       --cflags  Writes to  the  standard  output  the  command  line  options
                 required  to  compile  files  that use PCRE (this may include
                 some -I options, but is blank on many systems).

       --cflags-posix
                 Writes to  the  standard  output  the  command  line  options
                 required  to  compile files that use the PCRE posix emulation
                 library (this may include some -I options, but  is  blank  on
                 many systems).


SEE ALSO

       pcre(3)


AUTHOR

       This  manual  page  was originally written by Mark Baker for the Debian
       GNU/Linux system. It has been slightly revised as a  generic  PCRE  man
       page.


REVISION

       Last updated: 18 April 2007
usr/share/doc/alt-pcre802-devel/pcregrep.txt000064400000070333150403561460014644 0ustar00PCREGREP(1)                                                        PCREGREP(1)


NAME
       pcregrep - a grep with Perl-compatible regular expressions.


SYNOPSIS
       pcregrep [options] [long options] [pattern] [path1 path2 ...]


DESCRIPTION

       pcregrep  searches  files  for  character  patterns, in the same way as
       other grep commands do, but it uses the PCRE regular expression library
       to support patterns that are compatible with the regular expressions of
       Perl 5. See pcrepattern(3) for a full description of syntax and  seman-
       tics of the regular expressions that PCRE supports.

       Patterns,  whether  supplied on the command line or in a separate file,
       are given without delimiters. For example:

         pcregrep Thursday /etc/motd

       If you attempt to use delimiters (for example, by surrounding a pattern
       with  slashes,  as  is common in Perl scripts), they are interpreted as
       part of the pattern. Quotes can of course be used to  delimit  patterns
       on  the  command  line  because  they are interpreted by the shell, and
       indeed they are required if a pattern contains  white  space  or  shell
       metacharacters.

       The  first  argument that follows any option settings is treated as the
       single pattern to be matched when neither -e nor -f is  present.   Con-
       versely,  when  one  or  both of these options are used to specify pat-
       terns, all arguments are treated as path names. At least one of -e, -f,
       or an argument pattern must be provided.

       If no files are specified, pcregrep reads the standard input. The stan-
       dard input can also be referenced by a  name  consisting  of  a  single
       hyphen.  For example:

         pcregrep some-pattern /file1 - /file3

       By  default, each line that matches a pattern is copied to the standard
       output, and if there is more than one file, the file name is output  at
       the start of each line, followed by a colon. However, there are options
       that can change how pcregrep behaves.  In  particular,  the  -M  option
       makes  it  possible  to  search for patterns that span line boundaries.
       What defines a line  boundary  is  controlled  by  the  -N  (--newline)
       option.

       Patterns  are  limited  to  8K  or  BUFSIZ characters, whichever is the
       greater.  BUFSIZ is defined in <stdio.h>. When there is more  than  one
       pattern (specified by the use of -e and/or -f), each pattern is applied
       to each line in the order in which they are defined,  except  that  all
       the -e patterns are tried before the -f patterns.

       By  default,  as soon as one pattern matches (or fails to match when -v
       is used), no further patterns are considered. However, if --colour  (or
       --color) is used to colour the matching substrings, or if --only-match-
       ing, --file-offsets, or --line-offsets is used to output only the  part
       of  the  line  that  matched (either shown literally, or as an offset),
       scanning resumes immediately  following  the  match,  so  that  further
       matches  on the same line can be found. If there are multiple patterns,
       they are all tried on the remainder of the line, but patterns that fol-
       low the one that matched are not tried on the earlier part of the line.

       This is the same behaviour as GNU grep, but it does mean that the order
       in which multiple patterns are specified can affect the output when one
       of the above options is used.

       Patterns  that can match an empty string are accepted, but empty string
       matches   are   never   recognized.   An   example   is   the   pattern
       "(super)?(man)?",  in  which  all components are optional. This pattern
       finds all occurrences of both "super" and  "man";  the  output  differs
       from  matching  with  "super|man" when only the matching substrings are
       being shown.

       If the LC_ALL or LC_CTYPE environment variable is  set,  pcregrep  uses
       the  value to set a locale when calling the PCRE library.  The --locale
       option can be used to override this.


SUPPORT FOR COMPRESSED FILES

       It is possible to compile pcregrep so that it uses libz  or  libbz2  to
       read  files  whose names end in .gz or .bz2, respectively. You can find
       out whether your binary has support for one or both of these file types
       by running it with the --help option. If the appropriate support is not
       present, files are treated as plain text. The standard input is  always
       so treated.


OPTIONS

       The  order  in  which some of the options appear can affect the output.
       For example, both the -h and -l options affect  the  printing  of  file
       names.  Whichever  comes later in the command line will be the one that
       takes effect.

       --        This terminate the list of options. It is useful if the  next
                 item  on  the command line starts with a hyphen but is not an
                 option. This allows for the processing of patterns and  file-
                 names that start with hyphens.

       -A number, --after-context=number
                 Output  number  lines of context after each matching line. If
                 filenames and/or line numbers are being output, a hyphen sep-
                 arator  is  used  instead of a colon for the context lines. A
                 line containing "--" is output between each group  of  lines,
                 unless  they  are  in  fact contiguous in the input file. The
                 value of number is expected to be relatively small.  However,
                 pcregrep guarantees to have up to 8K of following text avail-
                 able for context output.

       -B number, --before-context=number
                 Output number lines of context before each matching line.  If
                 filenames and/or line numbers are being output, a hyphen sep-
                 arator is used instead of a colon for the  context  lines.  A
                 line  containing  "--" is output between each group of lines,
                 unless they are in fact contiguous in  the  input  file.  The
                 value  of number is expected to be relatively small. However,
                 pcregrep guarantees to have up to 8K of preceding text avail-
                 able for context output.

       -C number, --context=number
                 Output  number  lines  of  context both before and after each
                 matching line.  This is equivalent to setting both -A and  -B
                 to the same value.

       -c, --count
                 Do  not output individual lines from the files that are being
                 scanned; instead output the number of lines that would other-
                 wise  have  been  shown. If no lines are selected, the number
                 zero is output. If several files are  are  being  scanned,  a
                 count  is  output  for each of them. However, if the --files-
                 with-matches option is also  used,  only  those  files  whose
                 counts are greater than zero are listed. When -c is used, the
                 -A, -B, and -C options are ignored.

       --colour, --color
                 If this option is given without any data, it is equivalent to
                 "--colour=auto".   If  data  is required, it must be given in
                 the same shell item, separated by an equals sign.

       --colour=value, --color=value
                 This option specifies under what circumstances the parts of a
                 line that matched a pattern should be coloured in the output.
                 By default, the output is not coloured. The value  (which  is
                 optional,  see above) may be "never", "always", or "auto". In
                 the latter case, colouring happens only if the standard  out-
                 put  is connected to a terminal. More resources are used when
                 colouring is enabled, because pcregrep has to search for  all
                 possible  matches in a line, not just one, in order to colour
                 them all.

                 The colour that is used can be specified by setting the envi-
                 ronment variable PCREGREP_COLOUR or PCREGREP_COLOR. The value
                 of this variable should be a string of two numbers, separated
                 by  a  semicolon.  They  are copied directly into the control
                 string for setting colour  on  a  terminal,  so  it  is  your
                 responsibility  to ensure that they make sense. If neither of
                 the environment variables is  set,  the  default  is  "1;31",
                 which gives red.

       -D action, --devices=action
                 If  an  input  path  is  not  a  regular file or a directory,
                 "action" specifies how it is to be  processed.  Valid  values
                 are "read" (the default) or "skip" (silently skip the path).

       -d action, --directories=action
                 If an input path is a directory, "action" specifies how it is
                 to be processed.  Valid  values  are  "read"  (the  default),
                 "recurse"  (equivalent to the -r option), or "skip" (silently
                 skip the path). In the default case, directories are read  as
                 if  they  were  ordinary files. In some operating systems the
                 effect of reading a directory like this is an immediate  end-
                 of-file.

       -e pattern, --regex=pattern, --regexp=pattern
                 Specify a pattern to be matched. This option can be used mul-
                 tiple times in order to specify several patterns. It can also
                 be  used  as a way of specifying a single pattern that starts
                 with a hyphen. When -e is used, no argument pattern is  taken
                 from  the  command  line;  all  arguments are treated as file
                 names. There is an overall maximum of 100 patterns. They  are
                 applied  to  each line in the order in which they are defined
                 until one matches (or fails to match if -v is used). If -f is
                 used  with  -e,  the command line patterns are matched first,
                 followed by the patterns from the file,  independent  of  the
                 order  in which these options are specified. Note that multi-
                 ple use of -e is not the same as a single pattern with alter-
                 natives. For example, X|Y finds the first character in a line
                 that is X or Y, whereas if the two patterns are  given  sepa-
                 rately, pcregrep finds X if it is present, even if it follows
                 Y in the line. It finds Y only if there is no X in the  line.
                 This  really  matters  only  if  you are using -o to show the
                 part(s) of the line that matched.

       --exclude=pattern
                 When pcregrep is searching the files in a directory as a con-
                 sequence  of  the  -r  (recursive search) option, any regular
                 files whose names match the pattern are excluded. Subdirecto-
                 ries  are  not  excluded  by  this  option; they are searched
                 recursively, subject to the --exclude_dir  and  --include_dir
                 options.  The  pattern  is  a PCRE regular expression, and is
                 matched against the final component of the file name (not the
                 entire  path).  If  a  file  name  matches both --include and
                 --exclude, it is excluded.  There is no short form  for  this
                 option.

       --exclude_dir=pattern
                 When  pcregrep  is searching the contents of a directory as a
                 consequence of the -r (recursive search) option,  any  subdi-
                 rectories  whose  names match the pattern are excluded. (Note
                 that the --exclude option does  not  affect  subdirectories.)
                 The  pattern  is  a  PCRE  regular expression, and is matched
                 against the final component  of  the  name  (not  the  entire
                 path).  If a subdirectory name matches both --include_dir and
                 --exclude_dir, it is excluded. There is  no  short  form  for
                 this option.

       -F, --fixed-strings
                 Interpret  each pattern as a list of fixed strings, separated
                 by newlines, instead of  as  a  regular  expression.  The  -w
                 (match  as  a  word) and -x (match whole line) options can be
                 used with -F. They apply to each of the fixed strings. A line
                 is selected if any of the fixed strings are found in it (sub-
                 ject to -w or -x, if present).

       -f filename, --file=filename
                 Read a number of patterns from the file, one  per  line,  and
                 match  them against each line of input. A data line is output
                 if any of the patterns match it. The filename can be given as
                 "-" to refer to the standard input. When -f is used, patterns
                 specified on the command line using -e may also  be  present;
                 they are tested before the file's patterns. However, no other
                 pattern is taken from the command  line;  all  arguments  are
                 treated  as  file  names.  There is an overall maximum of 100
                 patterns. Trailing white space is removed from each line, and
                 blank  lines  are ignored. An empty file contains no patterns
                 and therefore matches nothing. See also  the  comments  about
                 multiple  patterns  versus a single pattern with alternatives
                 in the description of -e above.

       --file-offsets
                 Instead of showing lines or parts of lines that  match,  show
                 each  match  as  an  offset  from the start of the file and a
                 length, separated by a comma. In this  mode,  no  context  is
                 shown.  That  is,  the -A, -B, and -C options are ignored. If
                 there is more than one match in a line, each of them is shown
                 separately.  This  option  is mutually exclusive with --line-
                 offsets and --only-matching.

       -H, --with-filename
                 Force the inclusion of the filename at the  start  of  output
                 lines  when searching a single file. By default, the filename
                 is not shown in this case. For matching lines,  the  filename
                 is followed by a colon; for context lines, a hyphen separator
                 is used. If a line number is also being  output,  it  follows
                 the file name.

       -h, --no-filename
                 Suppress  the output filenames when searching multiple files.
                 By default, filenames  are  shown  when  multiple  files  are
                 searched.  For  matching lines, the filename is followed by a
                 colon; for context lines, a hyphen separator is used.   If  a
                 line number is also being output, it follows the file name.

       --help    Output  a  help  message, giving brief details of the command
                 options and file type support, and then exit.

       -i, --ignore-case
                 Ignore upper/lower case distinctions during comparisons.

       --include=pattern
                 When pcregrep is searching the files in a directory as a con-
                 sequence of the -r (recursive search) option, only those reg-
                 ular files whose names match the pattern are included. Subdi-
                 rectories  are always included and searched recursively, sub-
                 ject to the --include_dir and --exclude_dir options. The pat-
                 tern is a PCRE regular expression, and is matched against the
                 final component of the file name (not the entire path). If  a
                 file  name  matches  both  --include  and  --exclude,  it  is
                 excluded. There is no short form for this option.

       --include_dir=pattern
                 When pcregrep is searching the contents of a directory  as  a
                 consequence  of  the -r (recursive search) option, only those
                 subdirectories whose names match the  pattern  are  included.
                 (Note  that  the --include option does not affect subdirecto-
                 ries.) The pattern is  a  PCRE  regular  expression,  and  is
                 matched  against  the  final  component  of the name (not the
                 entire  path).  If   a   subdirectory   name   matches   both
                 --include_dir  and --exclude_dir, it is excluded. There is no
                 short form for this option.

       -L, --files-without-match
                 Instead of outputting lines from the files, just  output  the
                 names  of  the files that do not contain any lines that would
                 have been output. Each file name is output once, on  a  sepa-
                 rate line.

       -l, --files-with-matches
                 Instead  of  outputting lines from the files, just output the
                 names of the files containing lines that would have been out-
                 put.  Each  file  name  is  output  once, on a separate line.
                 Searching normally stops as soon as a matching line is  found
                 in  a  file.  However, if the -c (count) option is also used,
                 matching continues in order to obtain the correct count,  and
                 those  files  that  have  at least one match are listed along
                 with their counts. Using this option with -c is a way of sup-
                 pressing the listing of files with no matches.

       --label=name
                 This option supplies a name to be used for the standard input
                 when file names are being output. If not supplied, "(standard
                 input)" is used. There is no short form for this option.

       --line-offsets
                 Instead  of  showing lines or parts of lines that match, show
                 each match as a line number, the offset from the start of the
                 line,  and a length. The line number is terminated by a colon
                 (as usual; see the -n option), and the offset and length  are
                 separated  by  a  comma.  In  this mode, no context is shown.
                 That is, the -A, -B, and -C options are ignored. If there  is
                 more  than  one  match in a line, each of them is shown sepa-
                 rately. This option is mutually exclusive with --file-offsets
                 and --only-matching.

       --locale=locale-name
                 This  option specifies a locale to be used for pattern match-
                 ing. It overrides the value in the LC_ALL or  LC_CTYPE  envi-
                 ronment  variables.  If  no  locale  is  specified,  the PCRE
                 library's default (usually the "C" locale) is used. There  is
                 no short form for this option.

       -M, --multiline
                 Allow  patterns to match more than one line. When this option
                 is given, patterns may usefully contain literal newline char-
                 acters  and  internal  occurrences of ^ and $ characters. The
                 output for any one match may consist of more than  one  line.
                 When  this option is set, the PCRE library is called in "mul-
                 tiline" mode.  There is a limit to the number of  lines  that
                 can  be matched, imposed by the way that pcregrep buffers the
                 input file as it scans it. However, pcregrep ensures that  at
                 least 8K characters or the rest of the document (whichever is
                 the shorter) are available for forward  matching,  and  simi-
                 larly the previous 8K characters (or all the previous charac-
                 ters, if fewer than 8K) are guaranteed to  be  available  for
                 lookbehind assertions.

       -N newline-type, --newline=newline-type
                 The  PCRE  library  supports  five  different conventions for
                 indicating the ends of lines. They are  the  single-character
                 sequences  CR  (carriage  return) and LF (linefeed), the two-
                 character sequence CRLF, an "anycrlf" convention, which  rec-
                 ognizes  any  of the preceding three types, and an "any" con-
                 vention, in which any Unicode line ending sequence is assumed
                 to  end a line. The Unicode sequences are the three just men-
                 tioned,  plus  VT  (vertical  tab,  U+000B),  FF   (formfeed,
                 U+000C),   NEL  (next  line,  U+0085),  LS  (line  separator,
                 U+2028), and PS (paragraph separator, U+2029).

                 When  the  PCRE  library  is  built,  a  default  line-ending
                 sequence   is  specified.   This  is  normally  the  standard
                 sequence for the operating system. Unless otherwise specified
                 by  this  option,  pcregrep  uses the library's default.  The
                 possible values for this option are CR, LF, CRLF, ANYCRLF, or
                 ANY.  This  makes  it  possible to use pcregrep on files that
                 have come from other environments without  having  to  modify
                 their  line  endings.  If the data that is being scanned does
                 not agree with the convention set by  this  option,  pcregrep
                 may behave in strange ways.

       -n, --line-number
                 Precede each output line by its line number in the file, fol-
                 lowed by a colon for matching lines or a hyphen  for  context
                 lines.  If the filename is also being output, it precedes the
                 line number. This option is forced if --line-offsets is used.

       -o, --only-matching
                 Show only the part of the line that  matched  a  pattern.  In
                 this  mode,  no context is shown. That is, the -A, -B, and -C
                 options are ignored. If there is more than  one  match  in  a
                 line,  each  of  them  is shown separately. If -o is combined
                 with -v (invert the sense of the match to  find  non-matching
                 lines),  no  output  is generated, but the return code is set
                 appropriately. This option is mutually exclusive with --file-
                 offsets and --line-offsets.

       -q, --quiet
                 Work quietly, that is, display nothing except error messages.
                 The exit status indicates whether or  not  any  matches  were
                 found.

       -r, --recursive
                 If  any given path is a directory, recursively scan the files
                 it contains, taking note of any --include and --exclude  set-
                 tings.  By  default, a directory is read as a normal file; in
                 some operating systems this gives an  immediate  end-of-file.
                 This  option  is  a  shorthand  for  setting the -d option to
                 "recurse".

       -s, --no-messages
                 Suppress error  messages  about  non-existent  or  unreadable
                 files.  Such  files  are quietly skipped. However, the return
                 code is still 2, even if matches were found in other files.

       -u, --utf-8
                 Operate in UTF-8 mode. This option is available only if  PCRE
                 has  been compiled with UTF-8 support. Both patterns and sub-
                 ject lines must be valid strings of UTF-8 characters.

       -V, --version
                 Write the version numbers of pcregrep and  the  PCRE  library
                 that is being used to the standard error stream.

       -v, --invert-match
                 Invert  the  sense  of  the match, so that lines which do not
                 match any of the patterns are the ones that are found.

       -w, --word-regex, --word-regexp
                 Force the patterns to match only whole words. This is equiva-
                 lent to having \b at the start and end of the pattern.

       -x, --line-regex, --line-regexp
                 Force  the  patterns to be anchored (each must start matching
                 at the beginning of a line) and in addition, require them  to
                 match  entire  lines.  This  is  equivalent to having ^ and $
                 characters at the start and end of each alternative branch in
                 every pattern.


ENVIRONMENT VARIABLES

       The  environment  variables  LC_ALL  and LC_CTYPE are examined, in that
       order, for a locale. The first one that is set is  used.  This  can  be
       overridden  by  the  --locale  option.  If  no  locale is set, the PCRE
       library's default (usually the "C" locale) is used.


NEWLINES

       The -N (--newline) option allows pcregrep to scan files with  different
       newline  conventions  from  the  default.  However, the setting of this
       option does not affect the way in which pcregrep writes information  to
       the  standard  error  and  output streams. It uses the string "\n" in C
       printf() calls to indicate newlines, relying on the C  I/O  library  to
       convert  this  to  an  appropriate  sequence if the output is sent to a
       file.


OPTIONS COMPATIBILITY

       The majority of short and long forms of pcregrep's options are the same
       as  in  the  GNU grep program. Any long option of the form --xxx-regexp
       (GNU terminology) is also available as --xxx-regex (PCRE  terminology).
       However,  the  --locale,  -M,  --multiline, -u, and --utf-8 options are
       specific to pcregrep. If both the -c and -l options are given, GNU grep
       lists only file names, without counts, but pcregrep gives the counts.


OPTIONS WITH DATA

       There are four different ways in which an option with data can be spec-
       ified.  If a short form option is used, the  data  may  follow  immedi-
       ately, or in the next command line item. For example:

         -f/some/file
         -f /some/file

       If  a long form option is used, the data may appear in the same command
       line item, separated by an equals character, or (with one exception) it
       may appear in the next command line item. For example:

         --file=/some/file
         --file /some/file

       Note,  however, that if you want to supply a file name beginning with ~
       as data in a shell command, and have the  shell  expand  ~  to  a  home
       directory, you must separate the file name from the option, because the
       shell does not treat ~ specially unless it is at the start of an item.

       The exception to the above is the --colour  (or  --color)  option,  for
       which  the  data is optional. If this option does have data, it must be
       given in the first form, using an equals character. Otherwise  it  will
       be assumed that it has no data.


MATCHING ERRORS

       It  is  possible  to supply a regular expression that takes a very long
       time to fail to match certain lines.  Such  patterns  normally  involve
       nested  indefinite repeats, for example: (a+)*\d when matched against a
       line of a's with no final digit.  The  PCRE  matching  function  has  a
       resource  limit that causes it to abort in these circumstances. If this
       happens, pcregrep outputs an error message and the line that caused the
       problem  to  the  standard error stream. If there are more than 20 such
       errors, pcregrep gives up.


DIAGNOSTICS

       Exit status is 0 if any matches were found, 1 if no matches were found,
       and  2 for syntax errors and non-existent or inacessible files (even if
       matches were found in other files) or too many matching  errors.  Using
       the  -s  option to suppress error messages about inaccessble files does
       not affect the return code.


SEE ALSO

       pcrepattern(3), pcretest(1).


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 13 September 2009
       Copyright (c) 1997-2009 University of Cambridge.
usr/share/doc/alt-pcre802-devel/html/pcre_get_stringnumber.html000064400000002776150403561460020523 0ustar00<html>
<head>
<title>pcre_get_stringnumber specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_get_stringnumber man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This convenience function finds the number of a named substring capturing
parenthesis in a compiled pattern. Its arguments are:
<pre>
  <i>code</i>    Compiled regular expression
  <i>name</i>    Name whose number is required
</pre>
The yield of the function is the number of the parenthesis if the name is
found, or PCRE_ERROR_NOSUBSTRING otherwise. When duplicate names are allowed
(PCRE_DUPNAMES is set), it is not defined which of the numbers is returned by
<b>pcre_get_stringnumber()</b>. You can obtain the complete list by calling
<b>pcre_get_stringtable_entries()</b>.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcresample.html000064400000006437150403561460016265 0ustar00<html>
<head>
<title>pcresample specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcresample man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
PCRE SAMPLE PROGRAM
</b><br>
<P>
A simple, complete demonstration program, to get you started with using PCRE,
is supplied in the file <i>pcredemo.c</i> in the PCRE distribution. A listing of
this program is given in the
<a href="pcredemo.html"><b>pcredemo</b></a>
documentation. If you do not have a copy of the PCRE distribution, you can save
this listing to re-create <i>pcredemo.c</i>.
</P>
<P>
The program compiles the regular expression that is its first argument, and
matches it against the subject string in its second argument. No PCRE options
are set, and default character tables are used. If matching succeeds, the
program outputs the portion of the subject that matched, together with the
contents of any captured substrings.
</P>
<P>
If the -g option is given on the command line, the program then goes on to
check for further matches of the same regular expression in the same subject
string. The logic is a little bit tricky because of the possibility of matching
an empty string. Comments in the code explain what is going on.
</P>
<P>
If PCRE is installed in the standard include and library directories for your
operating system, you should be able to compile the demonstration program using
this command:
<pre>
  gcc -o pcredemo pcredemo.c -lpcre
</pre>
If PCRE is installed elsewhere, you may need to add additional options to the
command line. For example, on a Unix-like system that has PCRE installed in
<i>/usr/local</i>, you can compile the demonstration program using a command
like this:
<pre>
  gcc -o pcredemo -I/usr/local/include pcredemo.c -L/usr/local/lib -lpcre
</pre>
Once you have compiled the demonstration program, you can run simple tests like
this:
<pre>
  ./pcredemo 'cat|dog' 'the cat sat on the mat'
  ./pcredemo -g 'cat|dog' 'the dog sat on the cat'
</pre>
Note that there is a much more comprehensive test program, called
<a href="pcretest.html"><b>pcretest</b>,</a>
which supports many more facilities for testing regular expressions and the
PCRE library. The
<a href="pcredemo.html"><b>pcredemo</b></a>
program is provided as a simple coding example.
</P>
<P>
When you try to run
<a href="pcredemo.html"><b>pcredemo</b></a>
when PCRE is not installed in the standard library directory, you may get an
error like this on some operating systems (e.g. Solaris):
<pre>
  ld.so.1: a.out: fatal: libpcre.so.0: open failed: No such file or directory
</pre>
This is caused by the way shared library support works on those systems. You
need to add
<pre>
  -R/usr/local/lib
</pre>
(for example) to the compile command to get round this problem.
</P>
<br><b>
AUTHOR
</b><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><b>
REVISION
</b><br>
<P>
Last updated: 30 September 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcrematching.html000064400000023061150403561460016566 0ustar00<html>
<head>
<title>pcrematching specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrematching man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE MATCHING ALGORITHMS</a>
<li><a name="TOC2" href="#SEC2">REGULAR EXPRESSIONS AS TREES</a>
<li><a name="TOC3" href="#SEC3">THE STANDARD MATCHING ALGORITHM</a>
<li><a name="TOC4" href="#SEC4">THE ALTERNATIVE MATCHING ALGORITHM</a>
<li><a name="TOC5" href="#SEC5">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a>
<li><a name="TOC6" href="#SEC6">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a>
<li><a name="TOC7" href="#SEC7">AUTHOR</a>
<li><a name="TOC8" href="#SEC8">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE MATCHING ALGORITHMS</a><br>
<P>
This document describes the two different algorithms that are available in PCRE
for matching a compiled regular expression against a given subject string. The
"standard" algorithm is the one provided by the <b>pcre_exec()</b> function.
This works in the same was as Perl's matching function, and provides a
Perl-compatible matching operation.
</P>
<P>
An alternative algorithm is provided by the <b>pcre_dfa_exec()</b> function;
this operates in a different way, and is not Perl-compatible. It has advantages
and disadvantages compared with the standard algorithm, and these are described
below.
</P>
<P>
When there is only one possible way in which a given subject string can match a
pattern, the two algorithms give the same answer. A difference arises, however,
when there are multiple possibilities. For example, if the pattern
<pre>
  ^&#60;.*&#62;
</pre>
is matched against the string
<pre>
  &#60;something&#62; &#60;something else&#62; &#60;something further&#62;
</pre>
there are three possible answers. The standard algorithm finds only one of
them, whereas the alternative algorithm finds all three.
</P>
<br><a name="SEC2" href="#TOC1">REGULAR EXPRESSIONS AS TREES</a><br>
<P>
The set of strings that are matched by a regular expression can be represented
as a tree structure. An unlimited repetition in the pattern makes the tree of
infinite size, but it is still a tree. Matching the pattern to a given subject
string (from a given starting point) can be thought of as a search of the tree.
There are two ways to search a tree: depth-first and breadth-first, and these
correspond to the two matching algorithms provided by PCRE.
</P>
<br><a name="SEC3" href="#TOC1">THE STANDARD MATCHING ALGORITHM</a><br>
<P>
In the terminology of Jeffrey Friedl's book "Mastering Regular
Expressions", the standard algorithm is an "NFA algorithm". It conducts a
depth-first search of the pattern tree. That is, it proceeds along a single
path through the tree, checking that the subject matches what is required. When
there is a mismatch, the algorithm tries any alternatives at the current point,
and if they all fail, it backs up to the previous branch point in the tree, and
tries the next alternative branch at that level. This often involves backing up
(moving to the left) in the subject string as well. The order in which
repetition branches are tried is controlled by the greedy or ungreedy nature of
the quantifier.
</P>
<P>
If a leaf node is reached, a matching string has been found, and at that point
the algorithm stops. Thus, if there is more than one possible match, this
algorithm returns the first one that it finds. Whether this is the shortest,
the longest, or some intermediate length depends on the way the greedy and
ungreedy repetition quantifiers are specified in the pattern.
</P>
<P>
Because it ends up with a single path through the tree, it is relatively
straightforward for this algorithm to keep track of the substrings that are
matched by portions of the pattern in parentheses. This provides support for
capturing parentheses and back references.
</P>
<br><a name="SEC4" href="#TOC1">THE ALTERNATIVE MATCHING ALGORITHM</a><br>
<P>
This algorithm conducts a breadth-first search of the tree. Starting from the
first matching point in the subject, it scans the subject string from left to
right, once, character by character, and as it does this, it remembers all the
paths through the tree that represent valid matches. In Friedl's terminology,
this is a kind of "DFA algorithm", though it is not implemented as a
traditional finite state machine (it keeps multiple states active
simultaneously).
</P>
<P>
Although the general principle of this matching algorithm is that it scans the
subject string only once, without backtracking, there is one exception: when a
lookaround assertion is encountered, the characters following or preceding the
current point have to be independently inspected.
</P>
<P>
The scan continues until either the end of the subject is reached, or there are
no more unterminated paths. At this point, terminated paths represent the
different matching possibilities (if there are none, the match has failed).
Thus, if there is more than one possible match, this algorithm finds all of
them, and in particular, it finds the longest. There is an option to stop the
algorithm after the first match (which is necessarily the shortest) is found.
</P>
<P>
Note that all the matches that are found start at the same point in the
subject. If the pattern
<pre>
  cat(er(pillar)?)
</pre>
is matched against the string "the caterpillar catchment", the result will be
the three strings "cat", "cater", and "caterpillar" that start at the fourth
character of the subject. The algorithm does not automatically move on to find
matches that start at later positions.
</P>
<P>
There are a number of features of PCRE regular expressions that are not
supported by the alternative matching algorithm. They are as follows:
</P>
<P>
1. Because the algorithm finds all possible matches, the greedy or ungreedy
nature of repetition quantifiers is not relevant. Greedy and ungreedy
quantifiers are treated in exactly the same way. However, possessive
quantifiers can make a difference when what follows could also match what is
quantified, for example in a pattern like this:
<pre>
  ^a++\w!
</pre>
This pattern matches "aaab!" but not "aaa!", which would be matched by a
non-possessive quantifier. Similarly, if an atomic group is present, it is
matched as if it were a standalone pattern at the current point, and the
longest match is then "locked in" for the rest of the overall pattern.
</P>
<P>
2. When dealing with multiple paths through the tree simultaneously, it is not
straightforward to keep track of captured substrings for the different matching
possibilities, and PCRE's implementation of this algorithm does not attempt to
do this. This means that no captured substrings are available.
</P>
<P>
3. Because no substrings are captured, back references within the pattern are
not supported, and cause errors if encountered.
</P>
<P>
4. For the same reason, conditional expressions that use a backreference as the
condition or test for a specific group recursion are not supported.
</P>
<P>
5. Because many paths through the tree may be active, the \K escape sequence,
which resets the start of the match when encountered (but may be on some paths
and not on others), is not supported. It causes an error if encountered.
</P>
<P>
6. Callouts are supported, but the value of the <i>capture_top</i> field is
always 1, and the value of the <i>capture_last</i> field is always -1.
</P>
<P>
7. The \C escape sequence, which (in the standard algorithm) matches a single
byte, even in UTF-8 mode, is not supported because the alternative algorithm
moves through the subject string one character at a time, for all active paths
through the tree.
</P>
<P>
8. Except for (*FAIL), the backtracking control verbs such as (*PRUNE) are not
supported. (*FAIL) is supported, and behaves like a failing negative assertion.
</P>
<br><a name="SEC5" href="#TOC1">ADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
<P>
Using the alternative matching algorithm provides the following advantages:
</P>
<P>
1. All possible matches (at a single point in the subject) are automatically
found, and in particular, the longest match is found. To find more than one
match using the standard algorithm, you have to do kludgy things with
callouts.
</P>
<P>
2. Because the alternative algorithm scans the subject string just once, and
never needs to backtrack, it is possible to pass very long subject strings to
the matching function in several pieces, checking for partial matching each
time. The
<a href="pcrepartial.html"><b>pcrepartial</b></a>
documentation gives details of partial matching.
</P>
<br><a name="SEC6" href="#TOC1">DISADVANTAGES OF THE ALTERNATIVE ALGORITHM</a><br>
<P>
The alternative algorithm suffers from a number of disadvantages:
</P>
<P>
1. It is substantially slower than the standard algorithm. This is partly
because it has to search for all possible matches, but is also because it is
less susceptible to optimization.
</P>
<P>
2. Capturing parentheses and back references are not supported.
</P>
<P>
3. Although atomic groups are supported, their use does not provide the
performance advantage that it does for the standard algorithm.
</P>
<br><a name="SEC7" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC8" href="#TOC1">REVISION</a><br>
<P>
Last updated: 29 September 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_version.html000064400000001751150403561460016622 0ustar00<html>
<head>
<title>pcre_version specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_version man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>char *pcre_version(void);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns a character string that gives the version number of the
PCRE library and the date of its release.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcrestack.html000064400000017041150403561460016102 0ustar00<html>
<head>
<title>pcrestack specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrestack man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
PCRE DISCUSSION OF STACK USAGE
</b><br>
<P>
When you call <b>pcre_exec()</b>, it makes use of an internal function called
<b>match()</b>. This calls itself recursively at branch points in the pattern,
in order to remember the state of the match so that it can back up and try a
different alternative if the first one fails. As matching proceeds deeper and
deeper into the tree of possibilities, the recursion depth increases.
</P>
<P>
Not all calls of <b>match()</b> increase the recursion depth; for an item such
as a* it may be called several times at the same level, after matching
different numbers of a's. Furthermore, in a number of cases where the result of
the recursive call would immediately be passed back as the result of the
current call (a "tail recursion"), the function is just restarted instead.
</P>
<P>
The <b>pcre_dfa_exec()</b> function operates in an entirely different way, and
uses recursion only when there is a regular expression recursion or subroutine
call in the pattern. This includes the processing of assertion and "once-only"
subpatterns, which are handled like subroutine calls. Normally, these are never
very deep, and the limit on the complexity of <b>pcre_dfa_exec()</b> is
controlled by the amount of workspace it is given. However, it is possible to
write patterns with runaway infinite recursions; such patterns will cause
<b>pcre_dfa_exec()</b> to run out of stack. At present, there is no protection
against this.
</P>
<P>
The comments that follow do NOT apply to <b>pcre_dfa_exec()</b>; they are
relevant only for <b>pcre_exec()</b>.
</P>
<br><b>
Reducing <b>pcre_exec()</b>'s stack usage
</b><br>
<P>
Each time that <b>match()</b> is actually called recursively, it uses memory
from the process stack. For certain kinds of pattern and data, very large
amounts of stack may be needed, despite the recognition of "tail recursion".
You can often reduce the amount of recursion, and therefore the amount of stack
used, by modifying the pattern that is being matched. Consider, for example,
this pattern:
<pre>
  ([^&#60;]|&#60;(?!inet))+
</pre>
It matches from wherever it starts until it encounters "&#60;inet" or the end of
the data, and is the kind of pattern that might be used when processing an XML
file. Each iteration of the outer parentheses matches either one character that
is not "&#60;" or a "&#60;" that is not followed by "inet". However, each time a
parenthesis is processed, a recursion occurs, so this formulation uses a stack
frame for each matched character. For a long string, a lot of stack is
required. Consider now this rewritten pattern, which matches exactly the same
strings:
<pre>
  ([^&#60;]++|&#60;(?!inet))+
</pre>
This uses very much less stack, because runs of characters that do not contain
"&#60;" are "swallowed" in one item inside the parentheses. Recursion happens only
when a "&#60;" character that is not followed by "inet" is encountered (and we
assume this is relatively rare). A possessive quantifier is used to stop any
backtracking into the runs of non-"&#60;" characters, but that is not related to
stack usage.
</P>
<P>
This example shows that one way of avoiding stack problems when matching long
subject strings is to write repeated parenthesized subpatterns to match more
than one character whenever possible.
</P>
<br><b>
Compiling PCRE to use heap instead of stack for <b>pcre_exec()</b>
</b><br>
<P>
In environments where stack memory is constrained, you might want to compile
PCRE to use heap memory instead of stack for remembering back-up points when
<b>pcre_exec()</b> is running. This makes it run a lot more slowly, however.
Details of how to do this are given in the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
documentation. When built in this way, instead of using the stack, PCRE obtains
and frees memory by calling the functions that are pointed to by the
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables. By default, these
point to <b>malloc()</b> and <b>free()</b>, but you can replace the pointers to
cause PCRE to use your own functions. Since the block sizes are always the
same, and are always freed in reverse order, it may be possible to implement
customized memory handlers that are more efficient than the standard functions.
</P>
<br><b>
Limiting <b>pcre_exec()</b>'s stack usage
</b><br>
<P>
You can set limits on the number of times that <b>match()</b> is called, both in
total and recursively. If a limit is exceeded, <b>pcre_exec()</b> returns an
error code. Setting suitable limits should prevent it from running out of
stack. The default values of the limits are very large, and unlikely ever to
operate. They can be changed when PCRE is built, and they can also be set when
<b>pcre_exec()</b> is called. For details of these interfaces, see the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
documentation and the
<a href="pcreapi.html#extradata">section on extra data for <b>pcre_exec()</b></a>
in the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation.
</P>
<P>
As a very rough rule of thumb, you should reckon on about 500 bytes per
recursion. Thus, if you want to limit your stack usage to 8Mb, you
should set the limit at 16000 recursions. A 64Mb stack, on the other hand, can
support around 128000 recursions.
</P>
<P>
In Unix-like environments, the <b>pcretest</b> test program has a command line
option (<b>-S</b>) that can be used to increase the size of its stack. As long
as the stack is large enough, another option (<b>-M</b>) can be used to find the
smallest limits that allow a particular pattern to match a given subject
string. This is done by calling <b>pcre_exec()</b> repeatedly with different
limits.
</P>
<br><b>
Changing stack size in Unix-like systems
</b><br>
<P>
In Unix-like environments, there is not often a problem with the stack unless
very long strings are involved, though the default limit on stack size varies
from system to system. Values from 8Mb to 64Mb are common. You can find your
default limit by running the command:
<pre>
  ulimit -s
</pre>
Unfortunately, the effect of running out of stack is often SIGSEGV, though
sometimes a more explicit error message is given. You can normally increase the
limit on stack size by code such as this:
<pre>
  struct rlimit rlim;
  getrlimit(RLIMIT_STACK, &rlim);
  rlim.rlim_cur = 100*1024*1024;
  setrlimit(RLIMIT_STACK, &rlim);
</pre>
This reads the current limits (soft and hard) using <b>getrlimit()</b>, then
attempts to increase the soft limit to 100Mb using <b>setrlimit()</b>. You must
do this before calling <b>pcre_exec()</b>.
</P>
<br><b>
Changing stack size in Mac OS X
</b><br>
<P>
Using <b>setrlimit()</b>, as described above, should also work on Mac OS X. It
is also possible to set a stack size when linking a program. There is a
discussion about stack sizes in Mac OS X at this web site:
<a href="http://developer.apple.com/qa/qa2005/qa1419.html">http://developer.apple.com/qa/qa2005/qa1419.html.</a>
</P>
<br><b>
AUTHOR
</b><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><b>
REVISION
</b><br>
<P>
Last updated: 03 January 2010
<br>
Copyright &copy; 1997-2010 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcrepattern.html000064400000312427150403561460016460 0ustar00<html>
<head>
<title>pcrepattern specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrepattern man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE REGULAR EXPRESSION DETAILS</a>
<li><a name="TOC2" href="#SEC2">NEWLINE CONVENTIONS</a>
<li><a name="TOC3" href="#SEC3">CHARACTERS AND METACHARACTERS</a>
<li><a name="TOC4" href="#SEC4">BACKSLASH</a>
<li><a name="TOC5" href="#SEC5">CIRCUMFLEX AND DOLLAR</a>
<li><a name="TOC6" href="#SEC6">FULL STOP (PERIOD, DOT)</a>
<li><a name="TOC7" href="#SEC7">MATCHING A SINGLE BYTE</a>
<li><a name="TOC8" href="#SEC8">SQUARE BRACKETS AND CHARACTER CLASSES</a>
<li><a name="TOC9" href="#SEC9">POSIX CHARACTER CLASSES</a>
<li><a name="TOC10" href="#SEC10">VERTICAL BAR</a>
<li><a name="TOC11" href="#SEC11">INTERNAL OPTION SETTING</a>
<li><a name="TOC12" href="#SEC12">SUBPATTERNS</a>
<li><a name="TOC13" href="#SEC13">DUPLICATE SUBPATTERN NUMBERS</a>
<li><a name="TOC14" href="#SEC14">NAMED SUBPATTERNS</a>
<li><a name="TOC15" href="#SEC15">REPETITION</a>
<li><a name="TOC16" href="#SEC16">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a>
<li><a name="TOC17" href="#SEC17">BACK REFERENCES</a>
<li><a name="TOC18" href="#SEC18">ASSERTIONS</a>
<li><a name="TOC19" href="#SEC19">CONDITIONAL SUBPATTERNS</a>
<li><a name="TOC20" href="#SEC20">COMMENTS</a>
<li><a name="TOC21" href="#SEC21">RECURSIVE PATTERNS</a>
<li><a name="TOC22" href="#SEC22">SUBPATTERNS AS SUBROUTINES</a>
<li><a name="TOC23" href="#SEC23">ONIGURUMA SUBROUTINE SYNTAX</a>
<li><a name="TOC24" href="#SEC24">CALLOUTS</a>
<li><a name="TOC25" href="#SEC25">BACKTRACKING CONTROL</a>
<li><a name="TOC26" href="#SEC26">SEE ALSO</a>
<li><a name="TOC27" href="#SEC27">AUTHOR</a>
<li><a name="TOC28" href="#SEC28">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION DETAILS</a><br>
<P>
The syntax and semantics of the regular expressions that are supported by PCRE
are described in detail below. There is a quick-reference syntax summary in the
<a href="pcresyntax.html"><b>pcresyntax</b></a>
page. PCRE tries to match Perl syntax and semantics as closely as it can. PCRE
also supports some alternative regular expression syntax (which does not
conflict with the Perl syntax) in order to provide some compatibility with
regular expressions in Python, .NET, and Oniguruma.
</P>
<P>
Perl's regular expressions are described in its own documentation, and
regular expressions in general are covered in a number of books, some of which
have copious examples. Jeffrey Friedl's "Mastering Regular Expressions",
published by O'Reilly, covers regular expressions in great detail. This
description of PCRE's regular expressions is intended as reference material.
</P>
<P>
The original operation of PCRE was on strings of one-byte characters. However,
there is now also support for UTF-8 character strings. To use this,
PCRE must be built to include UTF-8 support, and you must call
<b>pcre_compile()</b> or <b>pcre_compile2()</b> with the PCRE_UTF8 option. There
is also a special sequence that can be given at the start of a pattern:
<pre>
  (*UTF8)
</pre>
Starting a pattern with this sequence is equivalent to setting the PCRE_UTF8
option. This feature is not Perl-compatible. How setting UTF-8 mode affects
pattern matching is mentioned in several places below. There is also a summary
of UTF-8 features in the
<a href="pcre.html#utf8support">section on UTF-8 support</a>
in the main
<a href="pcre.html"><b>pcre</b></a>
page.
</P>
<P>
The remainder of this document discusses the patterns that are supported by
PCRE when its main matching function, <b>pcre_exec()</b>, is used.
From release 6.0, PCRE offers a second matching function,
<b>pcre_dfa_exec()</b>, which matches using a different algorithm that is not
Perl-compatible. Some of the features discussed below are not available when
<b>pcre_dfa_exec()</b> is used. The advantages and disadvantages of the
alternative function, and how it differs from the normal function, are
discussed in the
<a href="pcrematching.html"><b>pcrematching</b></a>
page.
</P>
<br><a name="SEC2" href="#TOC1">NEWLINE CONVENTIONS</a><br>
<P>
PCRE supports five different conventions for indicating line breaks in
strings: a single CR (carriage return) character, a single LF (linefeed)
character, the two-character sequence CRLF, any of the three preceding, or any
Unicode newline sequence. The
<a href="pcreapi.html"><b>pcreapi</b></a>
page has
<a href="pcreapi.html#newlines">further discussion</a>
about newlines, and shows how to set the newline convention in the
<i>options</i> arguments for the compiling and matching functions.
</P>
<P>
It is also possible to specify a newline convention by starting a pattern
string with one of the following five sequences:
<pre>
  (*CR)        carriage return
  (*LF)        linefeed
  (*CRLF)      carriage return, followed by linefeed
  (*ANYCRLF)   any of the three above
  (*ANY)       all Unicode newline sequences
</pre>
These override the default and the options given to <b>pcre_compile()</b> or
<b>pcre_compile2()</b>. For example, on a Unix system where LF is the default
newline sequence, the pattern
<pre>
  (*CR)a.b
</pre>
changes the convention to CR. That pattern matches "a\nb" because LF is no
longer a newline. Note that these special settings, which are not
Perl-compatible, are recognized only at the very start of a pattern, and that
they must be in upper case. If more than one of them is present, the last one
is used.
</P>
<P>
The newline convention does not affect what the \R escape sequence matches. By
default, this is any Unicode newline sequence, for Perl compatibility. However,
this can be changed; see the description of \R in the section entitled
<a href="#newlineseq">"Newline sequences"</a>
below. A change of \R setting can be combined with a change of newline
convention.
</P>
<br><a name="SEC3" href="#TOC1">CHARACTERS AND METACHARACTERS</a><br>
<P>
A regular expression is a pattern that is matched against a subject string from
left to right. Most characters stand for themselves in a pattern, and match the
corresponding characters in the subject. As a trivial example, the pattern
<pre>
  The quick brown fox
</pre>
matches a portion of a subject string that is identical to itself. When
caseless matching is specified (the PCRE_CASELESS option), letters are matched
independently of case. In UTF-8 mode, PCRE always understands the concept of
case for characters whose values are less than 128, so caseless matching is
always possible. For characters with higher values, the concept of case is
supported if PCRE is compiled with Unicode property support, but not otherwise.
If you want to use caseless matching for characters 128 and above, you must
ensure that PCRE is compiled with Unicode property support as well as with
UTF-8 support.
</P>
<P>
The power of regular expressions comes from the ability to include alternatives
and repetitions in the pattern. These are encoded in the pattern by the use of
<i>metacharacters</i>, which do not stand for themselves but instead are
interpreted in some special way.
</P>
<P>
There are two different sets of metacharacters: those that are recognized
anywhere in the pattern except within square brackets, and those that are
recognized within square brackets. Outside square brackets, the metacharacters
are as follows:
<pre>
  \      general escape character with several uses
  ^      assert start of string (or line, in multiline mode)
  $      assert end of string (or line, in multiline mode)
  .      match any character except newline (by default)
  [      start character class definition
  |      start of alternative branch
  (      start subpattern
  )      end subpattern
  ?      extends the meaning of (
         also 0 or 1 quantifier
         also quantifier minimizer
  *      0 or more quantifier
  +      1 or more quantifier
         also "possessive quantifier"
  {      start min/max quantifier
</pre>
Part of a pattern that is in square brackets is called a "character class". In
a character class the only metacharacters are:
<pre>
  \      general escape character
  ^      negate the class, but only if the first character
  -      indicates character range
  [      POSIX character class (only if followed by POSIX syntax)
  ]      terminates the character class
</pre>
The following sections describe the use of each of the metacharacters.
</P>
<br><a name="SEC4" href="#TOC1">BACKSLASH</a><br>
<P>
The backslash character has several uses. Firstly, if it is followed by a
non-alphanumeric character, it takes away any special meaning that character
may have. This use of backslash as an escape character applies both inside and
outside character classes.
</P>
<P>
For example, if you want to match a * character, you write \* in the pattern.
This escaping action applies whether or not the following character would
otherwise be interpreted as a metacharacter, so it is always safe to precede a
non-alphanumeric with backslash to specify that it stands for itself. In
particular, if you want to match a backslash, you write \\.
</P>
<P>
If a pattern is compiled with the PCRE_EXTENDED option, whitespace in the
pattern (other than in a character class) and characters between a # outside
a character class and the next newline are ignored. An escaping backslash can
be used to include a whitespace or # character as part of the pattern.
</P>
<P>
If you want to remove the special meaning from a sequence of characters, you
can do so by putting them between \Q and \E. This is different from Perl in
that $ and @ are handled as literals in \Q...\E sequences in PCRE, whereas in
Perl, $ and @ cause variable interpolation. Note the following examples:
<pre>
  Pattern            PCRE matches   Perl matches

  \Qabc$xyz\E        abc$xyz        abc followed by the contents of $xyz
  \Qabc\$xyz\E       abc\$xyz       abc\$xyz
  \Qabc\E\$\Qxyz\E   abc$xyz        abc$xyz
</pre>
The \Q...\E sequence is recognized both inside and outside character classes.
<a name="digitsafterbackslash"></a></P>
<br><b>
Non-printing characters
</b><br>
<P>
A second use of backslash provides a way of encoding non-printing characters
in patterns in a visible manner. There is no restriction on the appearance of
non-printing characters, apart from the binary zero that terminates a pattern,
but when a pattern is being prepared by text editing, it is often easier to use
one of the following escape sequences than the binary character it represents:
<pre>
  \a        alarm, that is, the BEL character (hex 07)
  \cx       "control-x", where x is any character
  \e        escape (hex 1B)
  \f        formfeed (hex 0C)
  \n        linefeed (hex 0A)
  \r        carriage return (hex 0D)
  \t        tab (hex 09)
  \ddd      character with octal code ddd, or back reference
  \xhh      character with hex code hh
  \x{hhh..} character with hex code hhh..
</pre>
The precise effect of \cx is as follows: if x is a lower case letter, it
is converted to upper case. Then bit 6 of the character (hex 40) is inverted.
Thus \cz becomes hex 1A, but \c{ becomes hex 3B, while \c; becomes hex
7B.
</P>
<P>
After \x, from zero to two hexadecimal digits are read (letters can be in
upper or lower case). Any number of hexadecimal digits may appear between \x{
and }, but the value of the character code must be less than 256 in non-UTF-8
mode, and less than 2**31 in UTF-8 mode. That is, the maximum value in
hexadecimal is 7FFFFFFF. Note that this is bigger than the largest Unicode code
point, which is 10FFFF.
</P>
<P>
If characters other than hexadecimal digits appear between \x{ and }, or if
there is no terminating }, this form of escape is not recognized. Instead, the
initial \x will be interpreted as a basic hexadecimal escape, with no
following digits, giving a character whose value is zero.
</P>
<P>
Characters whose value is less than 256 can be defined by either of the two
syntaxes for \x. There is no difference in the way they are handled. For
example, \xdc is exactly the same as \x{dc}.
</P>
<P>
After \0 up to two further octal digits are read. If there are fewer than two
digits, just those that are present are used. Thus the sequence \0\x\07
specifies two binary zeros followed by a BEL character (code value 7). Make
sure you supply two digits after the initial zero if the pattern character that
follows is itself an octal digit.
</P>
<P>
The handling of a backslash followed by a digit other than 0 is complicated.
Outside a character class, PCRE reads it and any following digits as a decimal
number. If the number is less than 10, or if there have been at least that many
previous capturing left parentheses in the expression, the entire sequence is
taken as a <i>back reference</i>. A description of how this works is given
<a href="#backreferences">later,</a>
following the discussion of
<a href="#subpattern">parenthesized subpatterns.</a>
</P>
<P>
Inside a character class, or if the decimal number is greater than 9 and there
have not been that many capturing subpatterns, PCRE re-reads up to three octal
digits following the backslash, and uses them to generate a data character. Any
subsequent digits stand for themselves. In non-UTF-8 mode, the value of a
character specified in octal must be less than \400. In UTF-8 mode, values up
to \777 are permitted. For example:
<pre>
  \040   is another way of writing a space
  \40    is the same, provided there are fewer than 40 previous capturing subpatterns
  \7     is always a back reference
  \11    might be a back reference, or another way of writing a tab
  \011   is always a tab
  \0113  is a tab followed by the character "3"
  \113   might be a back reference, otherwise the character with octal code 113
  \377   might be a back reference, otherwise the byte consisting entirely of 1 bits
  \81    is either a back reference, or a binary zero followed by the two characters "8" and "1"
</pre>
Note that octal values of 100 or greater must not be introduced by a leading
zero, because no more than three octal digits are ever read.
</P>
<P>
All the sequences that define a single character value can be used both inside
and outside character classes. In addition, inside a character class, the
sequence \b is interpreted as the backspace character (hex 08), and the
sequences \R and \X are interpreted as the characters "R" and "X",
respectively. Outside a character class, these sequences have different
meanings
<a href="#uniextseq">(see below).</a>
</P>
<br><b>
Absolute and relative back references
</b><br>
<P>
The sequence \g followed by an unsigned or a negative number, optionally
enclosed in braces, is an absolute or relative back reference. A named back
reference can be coded as \g{name}. Back references are discussed
<a href="#backreferences">later,</a>
following the discussion of
<a href="#subpattern">parenthesized subpatterns.</a>
</P>
<br><b>
Absolute and relative subroutine calls
</b><br>
<P>
For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or
a number enclosed either in angle brackets or single quotes, is an alternative
syntax for referencing a subpattern as a "subroutine". Details are discussed
<a href="#onigurumasubroutines">later.</a>
Note that \g{...} (Perl syntax) and \g&#60;...&#62; (Oniguruma syntax) are <i>not</i>
synonymous. The former is a back reference; the latter is a
<a href="#subpatternsassubroutines">subroutine</a>
call.
</P>
<br><b>
Generic character types
</b><br>
<P>
Another use of backslash is for specifying generic character types. The
following are always recognized:
<pre>
  \d     any decimal digit
  \D     any character that is not a decimal digit
  \h     any horizontal whitespace character
  \H     any character that is not a horizontal whitespace character
  \s     any whitespace character
  \S     any character that is not a whitespace character
  \v     any vertical whitespace character
  \V     any character that is not a vertical whitespace character
  \w     any "word" character
  \W     any "non-word" character
</pre>
Each pair of escape sequences partitions the complete set of characters into
two disjoint sets. Any given character matches one, and only one, of each pair.
</P>
<P>
These character type sequences can appear both inside and outside character
classes. They each match one character of the appropriate type. If the current
matching point is at the end of the subject string, all of them fail, since
there is no character to match.
</P>
<P>
For compatibility with Perl, \s does not match the VT character (code 11).
This makes it different from the the POSIX "space" class. The \s characters
are HT (9), LF (10), FF (12), CR (13), and space (32). If "use locale;" is
included in a Perl script, \s may match the VT character. In PCRE, it never
does.
</P>
<P>
In UTF-8 mode, characters with values greater than 128 never match \d, \s, or
\w, and always match \D, \S, and \W. This is true even when Unicode
character property support is available. These sequences retain their original
meanings from before UTF-8 support was available, mainly for efficiency
reasons. Note that this also affects \b, because it is defined in terms of \w
and \W.
</P>
<P>
The sequences \h, \H, \v, and \V are Perl 5.10 features. In contrast to the
other sequences, these do match certain high-valued codepoints in UTF-8 mode.
The horizontal space characters are:
<pre>
  U+0009     Horizontal tab
  U+0020     Space
  U+00A0     Non-break space
  U+1680     Ogham space mark
  U+180E     Mongolian vowel separator
  U+2000     En quad
  U+2001     Em quad
  U+2002     En space
  U+2003     Em space
  U+2004     Three-per-em space
  U+2005     Four-per-em space
  U+2006     Six-per-em space
  U+2007     Figure space
  U+2008     Punctuation space
  U+2009     Thin space
  U+200A     Hair space
  U+202F     Narrow no-break space
  U+205F     Medium mathematical space
  U+3000     Ideographic space
</pre>
The vertical space characters are:
<pre>
  U+000A     Linefeed
  U+000B     Vertical tab
  U+000C     Formfeed
  U+000D     Carriage return
  U+0085     Next line
  U+2028     Line separator
  U+2029     Paragraph separator
</PRE>
</P>
<P>
A "word" character is an underscore or any character less than 256 that is a
letter or digit. The definition of letters and digits is controlled by PCRE's
low-valued character tables, and may vary if locale-specific matching is taking
place (see
<a href="pcreapi.html#localesupport">"Locale support"</a>
in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page). For example, in a French locale such as "fr_FR" in Unix-like systems,
or "french" in Windows, some character codes greater than 128 are used for
accented letters, and these are matched by \w. The use of locales with Unicode
is discouraged.
<a name="newlineseq"></a></P>
<br><b>
Newline sequences
</b><br>
<P>
Outside a character class, by default, the escape sequence \R matches any
Unicode newline sequence. This is a Perl 5.10 feature. In non-UTF-8 mode \R is
equivalent to the following:
<pre>
  (?&#62;\r\n|\n|\x0b|\f|\r|\x85)
</pre>
This is an example of an "atomic group", details of which are given
<a href="#atomicgroup">below.</a>
This particular group matches either the two-character sequence CR followed by
LF, or one of the single characters LF (linefeed, U+000A), VT (vertical tab,
U+000B), FF (formfeed, U+000C), CR (carriage return, U+000D), or NEL (next
line, U+0085). The two-character sequence is treated as a single unit that
cannot be split.
</P>
<P>
In UTF-8 mode, two additional characters whose codepoints are greater than 255
are added: LS (line separator, U+2028) and PS (paragraph separator, U+2029).
Unicode character property support is not needed for these characters to be
recognized.
</P>
<P>
It is possible to restrict \R to match only CR, LF, or CRLF (instead of the
complete set of Unicode line endings) by setting the option PCRE_BSR_ANYCRLF
either at compile time or when the pattern is matched. (BSR is an abbrevation
for "backslash R".) This can be made the default when PCRE is built; if this is
the case, the other behaviour can be requested via the PCRE_BSR_UNICODE option.
It is also possible to specify these settings by starting a pattern string with
one of the following sequences:
<pre>
  (*BSR_ANYCRLF)   CR, LF, or CRLF only
  (*BSR_UNICODE)   any Unicode newline sequence
</pre>
These override the default and the options given to <b>pcre_compile()</b> or
<b>pcre_compile2()</b>, but they can be overridden by options given to
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. Note that these special settings,
which are not Perl-compatible, are recognized only at the very start of a
pattern, and that they must be in upper case. If more than one of them is
present, the last one is used. They can be combined with a change of newline
convention, for example, a pattern can start with:
<pre>
  (*ANY)(*BSR_ANYCRLF)
</pre>
Inside a character class, \R matches the letter "R".
<a name="uniextseq"></a></P>
<br><b>
Unicode character properties
</b><br>
<P>
When PCRE is built with Unicode character property support, three additional
escape sequences that match characters with specific properties are available.
When not in UTF-8 mode, these sequences are of course limited to testing
characters whose codepoints are less than 256, but they do work in this mode.
The extra escape sequences are:
<pre>
  \p{<i>xx</i>}   a character with the <i>xx</i> property
  \P{<i>xx</i>}   a character without the <i>xx</i> property
  \X       an extended Unicode sequence
</pre>
The property names represented by <i>xx</i> above are limited to the Unicode
script names, the general category properties, and "Any", which matches any
character (including newline). Other properties such as "InMusicalSymbols" are
not currently supported by PCRE. Note that \P{Any} does not match any
characters, so always causes a match failure.
</P>
<P>
Sets of Unicode characters are defined as belonging to certain scripts. A
character from one of these sets can be matched using a script name. For
example:
<pre>
  \p{Greek}
  \P{Han}
</pre>
Those that are not part of an identified script are lumped together as
"Common". The current list of scripts is:
</P>
<P>
Arabic,
Armenian,
Avestan,
Balinese,
Bamum,
Bengali,
Bopomofo,
Braille,
Buginese,
Buhid,
Canadian_Aboriginal,
Carian,
Cham,
Cherokee,
Common,
Coptic,
Cuneiform,
Cypriot,
Cyrillic,
Deseret,
Devanagari,
Egyptian_Hieroglyphs,
Ethiopic,
Georgian,
Glagolitic,
Gothic,
Greek,
Gujarati,
Gurmukhi,
Han,
Hangul,
Hanunoo,
Hebrew,
Hiragana,
Imperial_Aramaic,
Inherited,
Inscriptional_Pahlavi,
Inscriptional_Parthian,
Javanese,
Kaithi,
Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
Khmer,
Lao,
Latin,
Lepcha,
Limbu,
Linear_B,
Lisu,
Lycian,
Lydian,
Malayalam,
Meetei_Mayek,
Mongolian,
Myanmar,
New_Tai_Lue,
Nko,
Ogham,
Old_Italic,
Old_Persian,
Old_South_Arabian,
Old_Turkic,
Ol_Chiki,
Oriya,
Osmanya,
Phags_Pa,
Phoenician,
Rejang,
Runic,
Samaritan,
Saurashtra,
Shavian,
Sinhala,
Sundanese,
Syloti_Nagri,
Syriac,
Tagalog,
Tagbanwa,
Tai_Le,
Tai_Tham,
Tai_Viet,
Tamil,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Ugaritic,
Vai,
Yi.
</P>
<P>
Each character has exactly one general category property, specified by a
two-letter abbreviation. For compatibility with Perl, negation can be specified
by including a circumflex between the opening brace and the property name. For
example, \p{^Lu} is the same as \P{Lu}.
</P>
<P>
If only one letter is specified with \p or \P, it includes all the general
category properties that start with that letter. In this case, in the absence
of negation, the curly brackets in the escape sequence are optional; these two
examples have the same effect:
<pre>
  \p{L}
  \pL
</pre>
The following general category property codes are supported:
<pre>
  C     Other
  Cc    Control
  Cf    Format
  Cn    Unassigned
  Co    Private use
  Cs    Surrogate

  L     Letter
  Ll    Lower case letter
  Lm    Modifier letter
  Lo    Other letter
  Lt    Title case letter
  Lu    Upper case letter

  M     Mark
  Mc    Spacing mark
  Me    Enclosing mark
  Mn    Non-spacing mark

  N     Number
  Nd    Decimal number
  Nl    Letter number
  No    Other number

  P     Punctuation
  Pc    Connector punctuation
  Pd    Dash punctuation
  Pe    Close punctuation
  Pf    Final punctuation
  Pi    Initial punctuation
  Po    Other punctuation
  Ps    Open punctuation

  S     Symbol
  Sc    Currency symbol
  Sk    Modifier symbol
  Sm    Mathematical symbol
  So    Other symbol

  Z     Separator
  Zl    Line separator
  Zp    Paragraph separator
  Zs    Space separator
</pre>
The special property L& is also supported: it matches a character that has
the Lu, Ll, or Lt property, in other words, a letter that is not classified as
a modifier or "other".
</P>
<P>
The Cs (Surrogate) property applies only to characters in the range U+D800 to
U+DFFF. Such characters are not valid in UTF-8 strings (see RFC 3629) and so
cannot be tested by PCRE, unless UTF-8 validity checking has been turned off
(see the discussion of PCRE_NO_UTF8_CHECK in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page). Perl does not support the Cs property.
</P>
<P>
The long synonyms for property names that Perl supports (such as \p{Letter})
are not supported by PCRE, nor is it permitted to prefix any of these
properties with "Is".
</P>
<P>
No character that is in the Unicode table has the Cn (unassigned) property.
Instead, this property is assumed for any code point that is not in the
Unicode table.
</P>
<P>
Specifying caseless matching does not affect these escape sequences. For
example, \p{Lu} always matches only upper case letters.
</P>
<P>
The \X escape matches any number of Unicode characters that form an extended
Unicode sequence. \X is equivalent to
<pre>
  (?&#62;\PM\pM*)
</pre>
That is, it matches a character without the "mark" property, followed by zero
or more characters with the "mark" property, and treats the sequence as an
atomic group
<a href="#atomicgroup">(see below).</a>
Characters with the "mark" property are typically accents that affect the
preceding character. None of them have codepoints less than 256, so in
non-UTF-8 mode \X matches any one character.
</P>
<P>
Matching characters by Unicode property is not fast, because PCRE has to search
a structure that contains data for over fifteen thousand characters. That is
why the traditional escape sequences such as \d and \w do not use Unicode
properties in PCRE.
<a name="resetmatchstart"></a></P>
<br><b>
Resetting the match start
</b><br>
<P>
The escape sequence \K, which is a Perl 5.10 feature, causes any previously
matched characters not to be included in the final matched sequence. For
example, the pattern:
<pre>
  foo\Kbar
</pre>
matches "foobar", but reports that it has matched "bar". This feature is
similar to a lookbehind assertion
<a href="#lookbehind">(described below).</a>
However, in this case, the part of the subject before the real match does not
have to be of fixed length, as lookbehind assertions do. The use of \K does
not interfere with the setting of
<a href="#subpattern">captured substrings.</a>
For example, when the pattern
<pre>
  (foo)\Kbar
</pre>
matches "foobar", the first substring is still set to "foo".
</P>
<P>
Perl documents that the use of \K within assertions is "not well defined". In
PCRE, \K is acted upon when it occurs inside positive assertions, but is
ignored in negative assertions.
<a name="smallassertions"></a></P>
<br><b>
Simple assertions
</b><br>
<P>
The final use of backslash is for certain simple assertions. An assertion
specifies a condition that has to be met at a particular point in a match,
without consuming any characters from the subject string. The use of
subpatterns for more complicated assertions is described
<a href="#bigassertions">below.</a>
The backslashed assertions are:
<pre>
  \b     matches at a word boundary
  \B     matches when not at a word boundary
  \A     matches at the start of the subject
  \Z     matches at the end of the subject
          also matches before a newline at the end of the subject
  \z     matches only at the end of the subject
  \G     matches at the first matching position in the subject
</pre>
These assertions may not appear in character classes (but note that \b has a
different meaning, namely the backspace character, inside a character class).
</P>
<P>
A word boundary is a position in the subject string where the current character
and the previous character do not both match \w or \W (i.e. one matches
\w and the other matches \W), or the start or end of the string if the
first or last character matches \w, respectively. Neither PCRE nor Perl has a
separte "start of word" or "end of word" metasequence. However, whatever
follows \b normally determines which it is. For example, the fragment
\ba matches "a" at the start of a word.
</P>
<P>
The \A, \Z, and \z assertions differ from the traditional circumflex and
dollar (described in the next section) in that they only ever match at the very
start and end of the subject string, whatever options are set. Thus, they are
independent of multiline mode. These three assertions are not affected by the
PCRE_NOTBOL or PCRE_NOTEOL options, which affect only the behaviour of the
circumflex and dollar metacharacters. However, if the <i>startoffset</i>
argument of <b>pcre_exec()</b> is non-zero, indicating that matching is to start
at a point other than the beginning of the subject, \A can never match. The
difference between \Z and \z is that \Z matches before a newline at the end
of the string as well as at the very end, whereas \z matches only at the end.
</P>
<P>
The \G assertion is true only when the current matching position is at the
start point of the match, as specified by the <i>startoffset</i> argument of
<b>pcre_exec()</b>. It differs from \A when the value of <i>startoffset</i> is
non-zero. By calling <b>pcre_exec()</b> multiple times with appropriate
arguments, you can mimic Perl's /g option, and it is in this kind of
implementation where \G can be useful.
</P>
<P>
Note, however, that PCRE's interpretation of \G, as the start of the current
match, is subtly different from Perl's, which defines it as the end of the
previous match. In Perl, these can be different when the previously matched
string was empty. Because PCRE does just one match at a time, it cannot
reproduce this behaviour.
</P>
<P>
If all the alternatives of a pattern begin with \G, the expression is anchored
to the starting match position, and the "anchored" flag is set in the compiled
regular expression.
</P>
<br><a name="SEC5" href="#TOC1">CIRCUMFLEX AND DOLLAR</a><br>
<P>
Outside a character class, in the default matching mode, the circumflex
character is an assertion that is true only if the current matching point is
at the start of the subject string. If the <i>startoffset</i> argument of
<b>pcre_exec()</b> is non-zero, circumflex can never match if the PCRE_MULTILINE
option is unset. Inside a character class, circumflex has an entirely different
meaning
<a href="#characterclass">(see below).</a>
</P>
<P>
Circumflex need not be the first character of the pattern if a number of
alternatives are involved, but it should be the first thing in each alternative
in which it appears if the pattern is ever to match that branch. If all
possible alternatives start with a circumflex, that is, if the pattern is
constrained to match only at the start of the subject, it is said to be an
"anchored" pattern. (There are also other constructs that can cause a pattern
to be anchored.)
</P>
<P>
A dollar character is an assertion that is true only if the current matching
point is at the end of the subject string, or immediately before a newline
at the end of the string (by default). Dollar need not be the last character of
the pattern if a number of alternatives are involved, but it should be the last
item in any branch in which it appears. Dollar has no special meaning in a
character class.
</P>
<P>
The meaning of dollar can be changed so that it matches only at the very end of
the string, by setting the PCRE_DOLLAR_ENDONLY option at compile time. This
does not affect the \Z assertion.
</P>
<P>
The meanings of the circumflex and dollar characters are changed if the
PCRE_MULTILINE option is set. When this is the case, a circumflex matches
immediately after internal newlines as well as at the start of the subject
string. It does not match after a newline that ends the string. A dollar
matches before any newlines in the string, as well as at the very end, when
PCRE_MULTILINE is set. When newline is specified as the two-character
sequence CRLF, isolated CR and LF characters do not indicate newlines.
</P>
<P>
For example, the pattern /^abc$/ matches the subject string "def\nabc" (where
\n represents a newline) in multiline mode, but not otherwise. Consequently,
patterns that are anchored in single line mode because all branches start with
^ are not anchored in multiline mode, and a match for circumflex is possible
when the <i>startoffset</i> argument of <b>pcre_exec()</b> is non-zero. The
PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set.
</P>
<P>
Note that the sequences \A, \Z, and \z can be used to match the start and
end of the subject in both modes, and if all branches of a pattern start with
\A it is always anchored, whether or not PCRE_MULTILINE is set.
</P>
<br><a name="SEC6" href="#TOC1">FULL STOP (PERIOD, DOT)</a><br>
<P>
Outside a character class, a dot in the pattern matches any one character in
the subject string except (by default) a character that signifies the end of a
line. In UTF-8 mode, the matched character may be more than one byte long.
</P>
<P>
When a line ending is defined as a single character, dot never matches that
character; when the two-character sequence CRLF is used, dot does not match CR
if it is immediately followed by LF, but otherwise it matches all characters
(including isolated CRs and LFs). When any Unicode line endings are being
recognized, dot does not match CR or LF or any of the other line ending
characters.
</P>
<P>
The behaviour of dot with regard to newlines can be changed. If the PCRE_DOTALL
option is set, a dot matches any one character, without exception. If the
two-character sequence CRLF is present in the subject string, it takes two dots
to match it.
</P>
<P>
The handling of dot is entirely independent of the handling of circumflex and
dollar, the only relationship being that they both involve newlines. Dot has no
special meaning in a character class.
</P>
<br><a name="SEC7" href="#TOC1">MATCHING A SINGLE BYTE</a><br>
<P>
Outside a character class, the escape sequence \C matches any one byte, both
in and out of UTF-8 mode. Unlike a dot, it always matches any line-ending
characters. The feature is provided in Perl in order to match individual bytes
in UTF-8 mode. Because it breaks up UTF-8 characters into individual bytes,
what remains in the string may be a malformed UTF-8 string. For this reason,
the \C escape sequence is best avoided.
</P>
<P>
PCRE does not allow \C to appear in lookbehind assertions
<a href="#lookbehind">(described below),</a>
because in UTF-8 mode this would make it impossible to calculate the length of
the lookbehind.
<a name="characterclass"></a></P>
<br><a name="SEC8" href="#TOC1">SQUARE BRACKETS AND CHARACTER CLASSES</a><br>
<P>
An opening square bracket introduces a character class, terminated by a closing
square bracket. A closing square bracket on its own is not special by default.
However, if the PCRE_JAVASCRIPT_COMPAT option is set, a lone closing square
bracket causes a compile-time error. If a closing square bracket is required as
a member of the class, it should be the first data character in the class
(after an initial circumflex, if present) or escaped with a backslash.
</P>
<P>
A character class matches a single character in the subject. In UTF-8 mode, the
character may be more than one byte long. A matched character must be in the
set of characters defined by the class, unless the first character in the class
definition is a circumflex, in which case the subject character must not be in
the set defined by the class. If a circumflex is actually required as a member
of the class, ensure it is not the first character, or escape it with a
backslash.
</P>
<P>
For example, the character class [aeiou] matches any lower case vowel, while
[^aeiou] matches any character that is not a lower case vowel. Note that a
circumflex is just a convenient notation for specifying the characters that
are in the class by enumerating those that are not. A class that starts with a
circumflex is not an assertion; it still consumes a character from the subject
string, and therefore it fails if the current pointer is at the end of the
string.
</P>
<P>
In UTF-8 mode, characters with values greater than 255 can be included in a
class as a literal string of bytes, or by using the \x{ escaping mechanism.
</P>
<P>
When caseless matching is set, any letters in a class represent both their
upper case and lower case versions, so for example, a caseless [aeiou] matches
"A" as well as "a", and a caseless [^aeiou] does not match "A", whereas a
caseful version would. In UTF-8 mode, PCRE always understands the concept of
case for characters whose values are less than 128, so caseless matching is
always possible. For characters with higher values, the concept of case is
supported if PCRE is compiled with Unicode property support, but not otherwise.
If you want to use caseless matching in UTF8-mode for characters 128 and above,
you must ensure that PCRE is compiled with Unicode property support as well as
with UTF-8 support.
</P>
<P>
Characters that might indicate line breaks are never treated in any special way
when matching character classes, whatever line-ending sequence is in use, and
whatever setting of the PCRE_DOTALL and PCRE_MULTILINE options is used. A class
such as [^a] always matches one of these characters.
</P>
<P>
The minus (hyphen) character can be used to specify a range of characters in a
character class. For example, [d-m] matches any letter between d and m,
inclusive. If a minus character is required in a class, it must be escaped with
a backslash or appear in a position where it cannot be interpreted as
indicating a range, typically as the first or last character in the class.
</P>
<P>
It is not possible to have the literal character "]" as the end character of a
range. A pattern such as [W-]46] is interpreted as a class of two characters
("W" and "-") followed by a literal string "46]", so it would match "W46]" or
"-46]". However, if the "]" is escaped with a backslash it is interpreted as
the end of range, so [W-\]46] is interpreted as a class containing a range
followed by two other characters. The octal or hexadecimal representation of
"]" can also be used to end a range.
</P>
<P>
Ranges operate in the collating sequence of character values. They can also be
used for characters specified numerically, for example [\000-\037]. In UTF-8
mode, ranges can include characters whose values are greater than 255, for
example [\x{100}-\x{2ff}].
</P>
<P>
If a range that includes letters is used when caseless matching is set, it
matches the letters in either case. For example, [W-c] is equivalent to
[][\\^_`wxyzabc], matched caselessly, and in non-UTF-8 mode, if character
tables for a French locale are in use, [\xc8-\xcb] matches accented E
characters in both cases. In UTF-8 mode, PCRE supports the concept of case for
characters with values greater than 128 only when it is compiled with Unicode
property support.
</P>
<P>
The character types \d, \D, \p, \P, \s, \S, \w, and \W may also appear
in a character class, and add the characters that they match to the class. For
example, [\dABCDEF] matches any hexadecimal digit. A circumflex can
conveniently be used with the upper case character types to specify a more
restricted set of characters than the matching lower case type. For example,
the class [^\W_] matches any letter or digit, but not underscore.
</P>
<P>
The only metacharacters that are recognized in character classes are backslash,
hyphen (only where it can be interpreted as specifying a range), circumflex
(only at the start), opening square bracket (only when it can be interpreted as
introducing a POSIX class name - see the next section), and the terminating
closing square bracket. However, escaping other non-alphanumeric characters
does no harm.
</P>
<br><a name="SEC9" href="#TOC1">POSIX CHARACTER CLASSES</a><br>
<P>
Perl supports the POSIX notation for character classes. This uses names
enclosed by [: and :] within the enclosing square brackets. PCRE also supports
this notation. For example,
<pre>
  [01[:alpha:]%]
</pre>
matches "0", "1", any alphabetic character, or "%". The supported class names
are
<pre>
  alnum    letters and digits
  alpha    letters
  ascii    character codes 0 - 127
  blank    space or tab only
  cntrl    control characters
  digit    decimal digits (same as \d)
  graph    printing characters, excluding space
  lower    lower case letters
  print    printing characters, including space
  punct    printing characters, excluding letters and digits
  space    white space (not quite the same as \s)
  upper    upper case letters
  word     "word" characters (same as \w)
  xdigit   hexadecimal digits
</pre>
The "space" characters are HT (9), LF (10), VT (11), FF (12), CR (13), and
space (32). Notice that this list includes the VT character (code 11). This
makes "space" different to \s, which does not include VT (for Perl
compatibility).
</P>
<P>
The name "word" is a Perl extension, and "blank" is a GNU extension from Perl
5.8. Another Perl extension is negation, which is indicated by a ^ character
after the colon. For example,
<pre>
  [12[:^digit:]]
</pre>
matches "1", "2", or any non-digit. PCRE (and Perl) also recognize the POSIX
syntax [.ch.] and [=ch=] where "ch" is a "collating element", but these are not
supported, and an error is given if they are encountered.
</P>
<P>
In UTF-8 mode, characters with values greater than 128 do not match any of
the POSIX character classes.
</P>
<br><a name="SEC10" href="#TOC1">VERTICAL BAR</a><br>
<P>
Vertical bar characters are used to separate alternative patterns. For example,
the pattern
<pre>
  gilbert|sullivan
</pre>
matches either "gilbert" or "sullivan". Any number of alternatives may appear,
and an empty alternative is permitted (matching the empty string). The matching
process tries each alternative in turn, from left to right, and the first one
that succeeds is used. If the alternatives are within a subpattern
<a href="#subpattern">(defined below),</a>
"succeeds" means matching the rest of the main pattern as well as the
alternative in the subpattern.
</P>
<br><a name="SEC11" href="#TOC1">INTERNAL OPTION SETTING</a><br>
<P>
The settings of the PCRE_CASELESS, PCRE_MULTILINE, PCRE_DOTALL, and
PCRE_EXTENDED options (which are Perl-compatible) can be changed from within
the pattern by a sequence of Perl option letters enclosed between "(?" and ")".
The option letters are
<pre>
  i  for PCRE_CASELESS
  m  for PCRE_MULTILINE
  s  for PCRE_DOTALL
  x  for PCRE_EXTENDED
</pre>
For example, (?im) sets caseless, multiline matching. It is also possible to
unset these options by preceding the letter with a hyphen, and a combined
setting and unsetting such as (?im-sx), which sets PCRE_CASELESS and
PCRE_MULTILINE while unsetting PCRE_DOTALL and PCRE_EXTENDED, is also
permitted. If a letter appears both before and after the hyphen, the option is
unset.
</P>
<P>
The PCRE-specific options PCRE_DUPNAMES, PCRE_UNGREEDY, and PCRE_EXTRA can be
changed in the same way as the Perl-compatible options by using the characters
J, U and X respectively.
</P>
<P>
When one of these option changes occurs at top level (that is, not inside
subpattern parentheses), the change applies to the remainder of the pattern
that follows. If the change is placed right at the start of a pattern, PCRE
extracts it into the global options (and it will therefore show up in data
extracted by the <b>pcre_fullinfo()</b> function).
</P>
<P>
An option change within a subpattern (see below for a description of
subpatterns) affects only that part of the current pattern that follows it, so
<pre>
  (a(?i)b)c
</pre>
matches abc and aBc and no other strings (assuming PCRE_CASELESS is not used).
By this means, options can be made to have different settings in different
parts of the pattern. Any changes made in one alternative do carry on
into subsequent branches within the same subpattern. For example,
<pre>
  (a(?i)b|c)
</pre>
matches "ab", "aB", "c", and "C", even though when matching "C" the first
branch is abandoned before the option setting. This is because the effects of
option settings happen at compile time. There would be some very weird
behaviour otherwise.
</P>
<P>
<b>Note:</b> There are other PCRE-specific options that can be set by the
application when the compile or match functions are called. In some cases the
pattern can contain special leading sequences such as (*CRLF) to override what
the application has set or what has been defaulted. Details are given in the
section entitled
<a href="#newlineseq">"Newline sequences"</a>
above. There is also the (*UTF8) leading sequence that can be used to set UTF-8
mode; this is equivalent to setting the PCRE_UTF8 option.
<a name="subpattern"></a></P>
<br><a name="SEC12" href="#TOC1">SUBPATTERNS</a><br>
<P>
Subpatterns are delimited by parentheses (round brackets), which can be nested.
Turning part of a pattern into a subpattern does two things:
<br>
<br>
1. It localizes a set of alternatives. For example, the pattern
<pre>
  cat(aract|erpillar|)
</pre>
matches one of the words "cat", "cataract", or "caterpillar". Without the
parentheses, it would match "cataract", "erpillar" or an empty string.
<br>
<br>
2. It sets up the subpattern as a capturing subpattern. This means that, when
the whole pattern matches, that portion of the subject string that matched the
subpattern is passed back to the caller via the <i>ovector</i> argument of
<b>pcre_exec()</b>. Opening parentheses are counted from left to right (starting
from 1) to obtain numbers for the capturing subpatterns.
</P>
<P>
For example, if the string "the red king" is matched against the pattern
<pre>
  the ((red|white) (king|queen))
</pre>
the captured substrings are "red king", "red", and "king", and are numbered 1,
2, and 3, respectively.
</P>
<P>
The fact that plain parentheses fulfil two functions is not always helpful.
There are often times when a grouping subpattern is required without a
capturing requirement. If an opening parenthesis is followed by a question mark
and a colon, the subpattern does not do any capturing, and is not counted when
computing the number of any subsequent capturing subpatterns. For example, if
the string "the white queen" is matched against the pattern
<pre>
  the ((?:red|white) (king|queen))
</pre>
the captured substrings are "white queen" and "queen", and are numbered 1 and
2. The maximum number of capturing subpatterns is 65535.
</P>
<P>
As a convenient shorthand, if any option settings are required at the start of
a non-capturing subpattern, the option letters may appear between the "?" and
the ":". Thus the two patterns
<pre>
  (?i:saturday|sunday)
  (?:(?i)saturday|sunday)
</pre>
match exactly the same set of strings. Because alternative branches are tried
from left to right, and options are not reset until the end of the subpattern
is reached, an option setting in one branch does affect subsequent branches, so
the above patterns match "SUNDAY" as well as "Saturday".
<a name="dupsubpatternnumber"></a></P>
<br><a name="SEC13" href="#TOC1">DUPLICATE SUBPATTERN NUMBERS</a><br>
<P>
Perl 5.10 introduced a feature whereby each alternative in a subpattern uses
the same numbers for its capturing parentheses. Such a subpattern starts with
(?| and is itself a non-capturing subpattern. For example, consider this
pattern:
<pre>
  (?|(Sat)ur|(Sun))day
</pre>
Because the two alternatives are inside a (?| group, both sets of capturing
parentheses are numbered one. Thus, when the pattern matches, you can look
at captured substring number one, whichever alternative matched. This construct
is useful when you want to capture part, but not all, of one of a number of
alternatives. Inside a (?| group, parentheses are numbered as usual, but the
number is reset at the start of each branch. The numbers of any capturing
buffers that follow the subpattern start after the highest number used in any
branch. The following example is taken from the Perl documentation.
The numbers underneath show in which buffer the captured content will be
stored.
<pre>
  # before  ---------------branch-reset----------- after
  / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
  # 1            2         2  3        2     3     4
</pre>
A back reference to a numbered subpattern uses the most recent value that is
set for that number by any subpattern. The following pattern matches "abcabc"
or "defdef":
<pre>
  /(?|(abc)|(def))\1/
</pre>
In contrast, a recursive or "subroutine" call to a numbered subpattern always
refers to the first one in the pattern with the given number. The following
pattern matches "abcabc" or "defabc":
<pre>
  /(?|(abc)|(def))(?1)/
</pre>
If a
<a href="#conditions">condition test</a>
for a subpattern's having matched refers to a non-unique number, the test is
true if any of the subpatterns of that number have matched.
</P>
<P>
An alternative approach to using this "branch reset" feature is to use
duplicate named subpatterns, as described in the next section.
</P>
<br><a name="SEC14" href="#TOC1">NAMED SUBPATTERNS</a><br>
<P>
Identifying capturing parentheses by number is simple, but it can be very hard
to keep track of the numbers in complicated regular expressions. Furthermore,
if an expression is modified, the numbers may change. To help with this
difficulty, PCRE supports the naming of subpatterns. This feature was not
added to Perl until release 5.10. Python had the feature earlier, and PCRE
introduced it at release 4.0, using the Python syntax. PCRE now supports both
the Perl and the Python syntax. Perl allows identically numbered subpatterns to
have different names, but PCRE does not.
</P>
<P>
In PCRE, a subpattern can be named in one of three ways: (?&#60;name&#62;...) or
(?'name'...) as in Perl, or (?P&#60;name&#62;...) as in Python. References to capturing
parentheses from other parts of the pattern, such as
<a href="#backreferences">back references,</a>
<a href="#recursion">recursion,</a>
and
<a href="#conditions">conditions,</a>
can be made by name as well as by number.
</P>
<P>
Names consist of up to 32 alphanumeric characters and underscores. Named
capturing parentheses are still allocated numbers as well as names, exactly as
if the names were not present. The PCRE API provides function calls for
extracting the name-to-number translation table from a compiled pattern. There
is also a convenience function for extracting a captured substring by name.
</P>
<P>
By default, a name must be unique within a pattern, but it is possible to relax
this constraint by setting the PCRE_DUPNAMES option at compile time. (Duplicate
names are also always permitted for subpatterns with the same number, set up as
described in the previous section.) Duplicate names can be useful for patterns
where only one instance of the named parentheses can match. Suppose you want to
match the name of a weekday, either as a 3-letter abbreviation or as the full
name, and in both cases you want to extract the abbreviation. This pattern
(ignoring the line breaks) does the job:
<pre>
  (?&#60;DN&#62;Mon|Fri|Sun)(?:day)?|
  (?&#60;DN&#62;Tue)(?:sday)?|
  (?&#60;DN&#62;Wed)(?:nesday)?|
  (?&#60;DN&#62;Thu)(?:rsday)?|
  (?&#60;DN&#62;Sat)(?:urday)?
</pre>
There are five capturing substrings, but only one is ever set after a match.
(An alternative way of solving this problem is to use a "branch reset"
subpattern, as described in the previous section.)
</P>
<P>
The convenience function for extracting the data by name returns the substring
for the first (and in this example, the only) subpattern of that name that
matched. This saves searching to find which numbered subpattern it was.
</P>
<P>
If you make a back reference to a non-unique named subpattern from elsewhere in
the pattern, the one that corresponds to the first occurrence of the name is
used. In the absence of duplicate numbers (see the previous section) this is
the one with the lowest number. If you use a named reference in a condition
test (see the
<a href="#conditions">section about conditions</a>
below), either to check whether a subpattern has matched, or to check for
recursion, all subpatterns with the same name are tested. If the condition is
true for any one of them, the overall condition is true. This is the same
behaviour as testing by number. For further details of the interfaces for
handling named subpatterns, see the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation.
</P>
<P>
<b>Warning:</b> You cannot use different names to distinguish between two
subpatterns with the same number because PCRE uses only the numbers when
matching. For this reason, an error is given at compile time if different names
are given to subpatterns with the same number. However, you can give the same
name to subpatterns with the same number, even when PCRE_DUPNAMES is not set.
</P>
<br><a name="SEC15" href="#TOC1">REPETITION</a><br>
<P>
Repetition is specified by quantifiers, which can follow any of the following
items:
<pre>
  a literal data character
  the dot metacharacter
  the \C escape sequence
  the \X escape sequence (in UTF-8 mode with Unicode properties)
  the \R escape sequence
  an escape such as \d that matches a single character
  a character class
  a back reference (see next section)
  a parenthesized subpattern (unless it is an assertion)
  a recursive or "subroutine" call to a subpattern
</pre>
The general repetition quantifier specifies a minimum and maximum number of
permitted matches, by giving the two numbers in curly brackets (braces),
separated by a comma. The numbers must be less than 65536, and the first must
be less than or equal to the second. For example:
<pre>
  z{2,4}
</pre>
matches "zz", "zzz", or "zzzz". A closing brace on its own is not a special
character. If the second number is omitted, but the comma is present, there is
no upper limit; if the second number and the comma are both omitted, the
quantifier specifies an exact number of required matches. Thus
<pre>
  [aeiou]{3,}
</pre>
matches at least 3 successive vowels, but may match many more, while
<pre>
  \d{8}
</pre>
matches exactly 8 digits. An opening curly bracket that appears in a position
where a quantifier is not allowed, or one that does not match the syntax of a
quantifier, is taken as a literal character. For example, {,6} is not a
quantifier, but a literal string of four characters.
</P>
<P>
In UTF-8 mode, quantifiers apply to UTF-8 characters rather than to individual
bytes. Thus, for example, \x{100}{2} matches two UTF-8 characters, each of
which is represented by a two-byte sequence. Similarly, when Unicode property
support is available, \X{3} matches three Unicode extended sequences, each of
which may be several bytes long (and they may be of different lengths).
</P>
<P>
The quantifier {0} is permitted, causing the expression to behave as if the
previous item and the quantifier were not present. This may be useful for
subpatterns that are referenced as
<a href="#subpatternsassubroutines">subroutines</a>
from elsewhere in the pattern. Items other than subpatterns that have a {0}
quantifier are omitted from the compiled pattern.
</P>
<P>
For convenience, the three most common quantifiers have single-character
abbreviations:
<pre>
  *    is equivalent to {0,}
  +    is equivalent to {1,}
  ?    is equivalent to {0,1}
</pre>
It is possible to construct infinite loops by following a subpattern that can
match no characters with a quantifier that has no upper limit, for example:
<pre>
  (a?)*
</pre>
Earlier versions of Perl and PCRE used to give an error at compile time for
such patterns. However, because there are cases where this can be useful, such
patterns are now accepted, but if any repetition of the subpattern does in fact
match no characters, the loop is forcibly broken.
</P>
<P>
By default, the quantifiers are "greedy", that is, they match as much as
possible (up to the maximum number of permitted times), without causing the
rest of the pattern to fail. The classic example of where this gives problems
is in trying to match comments in C programs. These appear between /* and */
and within the comment, individual * and / characters may appear. An attempt to
match C comments by applying the pattern
<pre>
  /\*.*\*/
</pre>
to the string
<pre>
  /* first comment */  not comment  /* second comment */
</pre>
fails, because it matches the entire string owing to the greediness of the .*
item.
</P>
<P>
However, if a quantifier is followed by a question mark, it ceases to be
greedy, and instead matches the minimum number of times possible, so the
pattern
<pre>
  /\*.*?\*/
</pre>
does the right thing with the C comments. The meaning of the various
quantifiers is not otherwise changed, just the preferred number of matches.
Do not confuse this use of question mark with its use as a quantifier in its
own right. Because it has two uses, it can sometimes appear doubled, as in
<pre>
  \d??\d
</pre>
which matches one digit by preference, but can match two if that is the only
way the rest of the pattern matches.
</P>
<P>
If the PCRE_UNGREEDY option is set (an option that is not available in Perl),
the quantifiers are not greedy by default, but individual ones can be made
greedy by following them with a question mark. In other words, it inverts the
default behaviour.
</P>
<P>
When a parenthesized subpattern is quantified with a minimum repeat count that
is greater than 1 or with a limited maximum, more memory is required for the
compiled pattern, in proportion to the size of the minimum or maximum.
</P>
<P>
If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equivalent
to Perl's /s) is set, thus allowing the dot to match newlines, the pattern is
implicitly anchored, because whatever follows will be tried against every
character position in the subject string, so there is no point in retrying the
overall match at any position after the first. PCRE normally treats such a
pattern as though it were preceded by \A.
</P>
<P>
In cases where it is known that the subject string contains no newlines, it is
worth setting PCRE_DOTALL in order to obtain this optimization, or
alternatively using ^ to indicate anchoring explicitly.
</P>
<P>
However, there is one situation where the optimization cannot be used. When .*
is inside capturing parentheses that are the subject of a back reference
elsewhere in the pattern, a match at the start may fail where a later one
succeeds. Consider, for example:
<pre>
  (.*)abc\1
</pre>
If the subject is "xyz123abc123" the match point is the fourth character. For
this reason, such a pattern is not implicitly anchored.
</P>
<P>
When a capturing subpattern is repeated, the value captured is the substring
that matched the final iteration. For example, after
<pre>
  (tweedle[dume]{3}\s*)+
</pre>
has matched "tweedledum tweedledee" the value of the captured substring is
"tweedledee". However, if there are nested capturing subpatterns, the
corresponding captured values may have been set in previous iterations. For
example, after
<pre>
  /(a|(b))+/
</pre>
matches "aba" the value of the second captured substring is "b".
<a name="atomicgroup"></a></P>
<br><a name="SEC16" href="#TOC1">ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS</a><br>
<P>
With both maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
repetition, failure of what follows normally causes the repeated item to be
re-evaluated to see if a different number of repeats allows the rest of the
pattern to match. Sometimes it is useful to prevent this, either to change the
nature of the match, or to cause it fail earlier than it otherwise might, when
the author of the pattern knows there is no point in carrying on.
</P>
<P>
Consider, for example, the pattern \d+foo when applied to the subject line
<pre>
  123456bar
</pre>
After matching all 6 digits and then failing to match "foo", the normal
action of the matcher is to try again with only 5 digits matching the \d+
item, and then with 4, and so on, before ultimately failing. "Atomic grouping"
(a term taken from Jeffrey Friedl's book) provides the means for specifying
that once a subpattern has matched, it is not to be re-evaluated in this way.
</P>
<P>
If we use atomic grouping for the previous example, the matcher gives up
immediately on failing to match "foo" the first time. The notation is a kind of
special parenthesis, starting with (?&#62; as in this example:
<pre>
  (?&#62;\d+)foo
</pre>
This kind of parenthesis "locks up" the  part of the pattern it contains once
it has matched, and a failure further into the pattern is prevented from
backtracking into it. Backtracking past it to previous items, however, works as
normal.
</P>
<P>
An alternative description is that a subpattern of this type matches the string
of characters that an identical standalone pattern would match, if anchored at
the current point in the subject string.
</P>
<P>
Atomic grouping subpatterns are not capturing subpatterns. Simple cases such as
the above example can be thought of as a maximizing repeat that must swallow
everything it can. So, while both \d+ and \d+? are prepared to adjust the
number of digits they match in order to make the rest of the pattern match,
(?&#62;\d+) can only match an entire sequence of digits.
</P>
<P>
Atomic groups in general can of course contain arbitrarily complicated
subpatterns, and can be nested. However, when the subpattern for an atomic
group is just a single repeated item, as in the example above, a simpler
notation, called a "possessive quantifier" can be used. This consists of an
additional + character following a quantifier. Using this notation, the
previous example can be rewritten as
<pre>
  \d++foo
</pre>
Note that a possessive quantifier can be used with an entire group, for
example:
<pre>
  (abc|xyz){2,3}+
</pre>
Possessive quantifiers are always greedy; the setting of the PCRE_UNGREEDY
option is ignored. They are a convenient notation for the simpler forms of
atomic group. However, there is no difference in the meaning of a possessive
quantifier and the equivalent atomic group, though there may be a performance
difference; possessive quantifiers should be slightly faster.
</P>
<P>
The possessive quantifier syntax is an extension to the Perl 5.8 syntax.
Jeffrey Friedl originated the idea (and the name) in the first edition of his
book. Mike McCloskey liked it, so implemented it when he built Sun's Java
package, and PCRE copied it from there. It ultimately found its way into Perl
at release 5.10.
</P>
<P>
PCRE has an optimization that automatically "possessifies" certain simple
pattern constructs. For example, the sequence A+B is treated as A++B because
there is no point in backtracking into a sequence of A's when B must follow.
</P>
<P>
When a pattern contains an unlimited repeat inside a subpattern that can itself
be repeated an unlimited number of times, the use of an atomic group is the
only way to avoid some failing matches taking a very long time indeed. The
pattern
<pre>
  (\D+|&#60;\d+&#62;)*[!?]
</pre>
matches an unlimited number of substrings that either consist of non-digits, or
digits enclosed in &#60;&#62;, followed by either ! or ?. When it matches, it runs
quickly. However, if it is applied to
<pre>
  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
</pre>
it takes a long time before reporting failure. This is because the string can
be divided between the internal \D+ repeat and the external * repeat in a
large number of ways, and all have to be tried. (The example uses [!?] rather
than a single character at the end, because both PCRE and Perl have an
optimization that allows for fast failure when a single character is used. They
remember the last single character that is required for a match, and fail early
if it is not present in the string.) If the pattern is changed so that it uses
an atomic group, like this:
<pre>
  ((?&#62;\D+)|&#60;\d+&#62;)*[!?]
</pre>
sequences of non-digits cannot be broken, and failure happens quickly.
<a name="backreferences"></a></P>
<br><a name="SEC17" href="#TOC1">BACK REFERENCES</a><br>
<P>
Outside a character class, a backslash followed by a digit greater than 0 (and
possibly further digits) is a back reference to a capturing subpattern earlier
(that is, to its left) in the pattern, provided there have been that many
previous capturing left parentheses.
</P>
<P>
However, if the decimal number following the backslash is less than 10, it is
always taken as a back reference, and causes an error only if there are not
that many capturing left parentheses in the entire pattern. In other words, the
parentheses that are referenced need not be to the left of the reference for
numbers less than 10. A "forward back reference" of this type can make sense
when a repetition is involved and the subpattern to the right has participated
in an earlier iteration.
</P>
<P>
It is not possible to have a numerical "forward back reference" to a subpattern
whose number is 10 or more using this syntax because a sequence such as \50 is
interpreted as a character defined in octal. See the subsection entitled
"Non-printing characters"
<a href="#digitsafterbackslash">above</a>
for further details of the handling of digits following a backslash. There is
no such problem when named parentheses are used. A back reference to any
subpattern is possible using named parentheses (see below).
</P>
<P>
Another way of avoiding the ambiguity inherent in the use of digits following a
backslash is to use the \g escape sequence, which is a feature introduced in
Perl 5.10. This escape must be followed by an unsigned number or a negative
number, optionally enclosed in braces. These examples are all identical:
<pre>
  (ring), \1
  (ring), \g1
  (ring), \g{1}
</pre>
An unsigned number specifies an absolute reference without the ambiguity that
is present in the older syntax. It is also useful when literal digits follow
the reference. A negative number is a relative reference. Consider this
example:
<pre>
  (abc(def)ghi)\g{-1}
</pre>
The sequence \g{-1} is a reference to the most recently started capturing
subpattern before \g, that is, is it equivalent to \2. Similarly, \g{-2}
would be equivalent to \1. The use of relative references can be helpful in
long patterns, and also in patterns that are created by joining together
fragments that contain references within themselves.
</P>
<P>
A back reference matches whatever actually matched the capturing subpattern in
the current subject string, rather than anything matching the subpattern
itself (see
<a href="#subpatternsassubroutines">"Subpatterns as subroutines"</a>
below for a way of doing that). So the pattern
<pre>
  (sens|respons)e and \1ibility
</pre>
matches "sense and sensibility" and "response and responsibility", but not
"sense and responsibility". If caseful matching is in force at the time of the
back reference, the case of letters is relevant. For example,
<pre>
  ((?i)rah)\s+\1
</pre>
matches "rah rah" and "RAH RAH", but not "RAH rah", even though the original
capturing subpattern is matched caselessly.
</P>
<P>
There are several different ways of writing back references to named
subpatterns. The .NET syntax \k{name} and the Perl syntax \k&#60;name&#62; or
\k'name' are supported, as is the Python syntax (?P=name). Perl 5.10's unified
back reference syntax, in which \g can be used for both numeric and named
references, is also supported. We could rewrite the above example in any of
the following ways:
<pre>
  (?&#60;p1&#62;(?i)rah)\s+\k&#60;p1&#62;
  (?'p1'(?i)rah)\s+\k{p1}
  (?P&#60;p1&#62;(?i)rah)\s+(?P=p1)
  (?&#60;p1&#62;(?i)rah)\s+\g{p1}
</pre>
A subpattern that is referenced by name may appear in the pattern before or
after the reference.
</P>
<P>
There may be more than one back reference to the same subpattern. If a
subpattern has not actually been used in a particular match, any back
references to it always fail by default. For example, the pattern
<pre>
  (a|(bc))\2
</pre>
always fails if it starts to match "a" rather than "bc". However, if the
PCRE_JAVASCRIPT_COMPAT option is set at compile time, a back reference to an
unset value matches an empty string.
</P>
<P>
Because there may be many capturing parentheses in a pattern, all digits
following a backslash are taken as part of a potential back reference number.
If the pattern continues with a digit character, some delimiter must be used to
terminate the back reference. If the PCRE_EXTENDED option is set, this can be
whitespace. Otherwise, the \g{ syntax or an empty comment (see
<a href="#comments">"Comments"</a>
below) can be used.
</P>
<br><b>
Recursive back references
</b><br>
<P>
A back reference that occurs inside the parentheses to which it refers fails
when the subpattern is first used, so, for example, (a\1) never matches.
However, such references can be useful inside repeated subpatterns. For
example, the pattern
<pre>
  (a|b\1)+
</pre>
matches any number of "a"s and also "aba", "ababbaa" etc. At each iteration of
the subpattern, the back reference matches the character string corresponding
to the previous iteration. In order for this to work, the pattern must be such
that the first iteration does not need to match the back reference. This can be
done using alternation, as in the example above, or by a quantifier with a
minimum of zero.
</P>
<P>
Back references of this type cause the group that they reference to be treated
as an
<a href="#atomicgroup">atomic group.</a>
Once the whole group has been matched, a subsequent matching failure cannot
cause backtracking into the middle of the group.
<a name="bigassertions"></a></P>
<br><a name="SEC18" href="#TOC1">ASSERTIONS</a><br>
<P>
An assertion is a test on the characters following or preceding the current
matching point that does not actually consume any characters. The simple
assertions coded as \b, \B, \A, \G, \Z, \z, ^ and $ are described
<a href="#smallassertions">above.</a>
</P>
<P>
More complicated assertions are coded as subpatterns. There are two kinds:
those that look ahead of the current position in the subject string, and those
that look behind it. An assertion subpattern is matched in the normal way,
except that it does not cause the current matching position to be changed.
</P>
<P>
Assertion subpatterns are not capturing subpatterns, and may not be repeated,
because it makes no sense to assert the same thing several times. If any kind
of assertion contains capturing subpatterns within it, these are counted for
the purposes of numbering the capturing subpatterns in the whole pattern.
However, substring capturing is carried out only for positive assertions,
because it does not make sense for negative assertions.
</P>
<br><b>
Lookahead assertions
</b><br>
<P>
Lookahead assertions start with (?= for positive assertions and (?! for
negative assertions. For example,
<pre>
  \w+(?=;)
</pre>
matches a word followed by a semicolon, but does not include the semicolon in
the match, and
<pre>
  foo(?!bar)
</pre>
matches any occurrence of "foo" that is not followed by "bar". Note that the
apparently similar pattern
<pre>
  (?!foo)bar
</pre>
does not find an occurrence of "bar" that is preceded by something other than
"foo"; it finds any occurrence of "bar" whatsoever, because the assertion
(?!foo) is always true when the next three characters are "bar". A
lookbehind assertion is needed to achieve the other effect.
</P>
<P>
If you want to force a matching failure at some point in a pattern, the most
convenient way to do it is with (?!) because an empty string always matches, so
an assertion that requires there not to be an empty string must always fail.
The Perl 5.10 backtracking control verb (*FAIL) or (*F) is essentially a
synonym for (?!).
<a name="lookbehind"></a></P>
<br><b>
Lookbehind assertions
</b><br>
<P>
Lookbehind assertions start with (?&#60;= for positive assertions and (?&#60;! for
negative assertions. For example,
<pre>
  (?&#60;!foo)bar
</pre>
does find an occurrence of "bar" that is not preceded by "foo". The contents of
a lookbehind assertion are restricted such that all the strings it matches must
have a fixed length. However, if there are several top-level alternatives, they
do not all have to have the same fixed length. Thus
<pre>
  (?&#60;=bullock|donkey)
</pre>
is permitted, but
<pre>
  (?&#60;!dogs?|cats?)
</pre>
causes an error at compile time. Branches that match different length strings
are permitted only at the top level of a lookbehind assertion. This is an
extension compared with Perl (5.8 and 5.10), which requires all branches to
match the same length of string. An assertion such as
<pre>
  (?&#60;=ab(c|de))
</pre>
is not permitted, because its single top-level branch can match two different
lengths, but it is acceptable to PCRE if rewritten to use two top-level
branches:
<pre>
  (?&#60;=abc|abde)
</pre>
In some cases, the Perl 5.10 escape sequence \K
<a href="#resetmatchstart">(see above)</a>
can be used instead of a lookbehind assertion to get round the fixed-length
restriction.
</P>
<P>
The implementation of lookbehind assertions is, for each alternative, to
temporarily move the current position back by the fixed length and then try to
match. If there are insufficient characters before the current position, the
assertion fails.
</P>
<P>
PCRE does not allow the \C escape (which matches a single byte in UTF-8 mode)
to appear in lookbehind assertions, because it makes it impossible to calculate
the length of the lookbehind. The \X and \R escapes, which can match
different numbers of bytes, are also not permitted.
</P>
<P>
<a href="#subpatternsassubroutines">"Subroutine"</a>
calls (see below) such as (?2) or (?&X) are permitted in lookbehinds, as long
as the subpattern matches a fixed-length string.
<a href="#recursion">Recursion,</a>
however, is not supported.
</P>
<P>
Possessive quantifiers can be used in conjunction with lookbehind assertions to
specify efficient matching of fixed-length strings at the end of subject
strings. Consider a simple pattern such as
<pre>
  abcd$
</pre>
when applied to a long string that does not match. Because matching proceeds
from left to right, PCRE will look for each "a" in the subject and then see if
what follows matches the rest of the pattern. If the pattern is specified as
<pre>
  ^.*abcd$
</pre>
the initial .* matches the entire string at first, but when this fails (because
there is no following "a"), it backtracks to match all but the last character,
then all but the last two characters, and so on. Once again the search for "a"
covers the entire string, from right to left, so we are no better off. However,
if the pattern is written as
<pre>
  ^.*+(?&#60;=abcd)
</pre>
there can be no backtracking for the .*+ item; it can match only the entire
string. The subsequent lookbehind assertion does a single test on the last four
characters. If it fails, the match fails immediately. For long strings, this
approach makes a significant difference to the processing time.
</P>
<br><b>
Using multiple assertions
</b><br>
<P>
Several assertions (of any sort) may occur in succession. For example,
<pre>
  (?&#60;=\d{3})(?&#60;!999)foo
</pre>
matches "foo" preceded by three digits that are not "999". Notice that each of
the assertions is applied independently at the same point in the subject
string. First there is a check that the previous three characters are all
digits, and then there is a check that the same three characters are not "999".
This pattern does <i>not</i> match "foo" preceded by six characters, the first
of which are digits and the last three of which are not "999". For example, it
doesn't match "123abcfoo". A pattern to do that is
<pre>
  (?&#60;=\d{3}...)(?&#60;!999)foo
</pre>
This time the first assertion looks at the preceding six characters, checking
that the first three are digits, and then the second assertion checks that the
preceding three characters are not "999".
</P>
<P>
Assertions can be nested in any combination. For example,
<pre>
  (?&#60;=(?&#60;!foo)bar)baz
</pre>
matches an occurrence of "baz" that is preceded by "bar" which in turn is not
preceded by "foo", while
<pre>
  (?&#60;=\d{3}(?!999)...)foo
</pre>
is another pattern that matches "foo" preceded by three digits and any three
characters that are not "999".
<a name="conditions"></a></P>
<br><a name="SEC19" href="#TOC1">CONDITIONAL SUBPATTERNS</a><br>
<P>
It is possible to cause the matching process to obey a subpattern
conditionally or to choose between two alternative subpatterns, depending on
the result of an assertion, or whether a specific capturing subpattern has
already been matched. The two possible forms of conditional subpattern are:
<pre>
  (?(condition)yes-pattern)
  (?(condition)yes-pattern|no-pattern)
</pre>
If the condition is satisfied, the yes-pattern is used; otherwise the
no-pattern (if present) is used. If there are more than two alternatives in the
subpattern, a compile-time error occurs.
</P>
<P>
There are four kinds of condition: references to subpatterns, references to
recursion, a pseudo-condition called DEFINE, and assertions.
</P>
<br><b>
Checking for a used subpattern by number
</b><br>
<P>
If the text between the parentheses consists of a sequence of digits, the
condition is true if a capturing subpattern of that number has previously
matched. If there is more than one capturing subpattern with the same number
(see the earlier
<a href="#recursion">section about duplicate subpattern numbers),</a>
the condition is true if any of them have been set. An alternative notation is
to precede the digits with a plus or minus sign. In this case, the subpattern
number is relative rather than absolute. The most recently opened parentheses
can be referenced by (?(-1), the next most recent by (?(-2), and so on. In
looping constructs it can also make sense to refer to subsequent groups with
constructs such as (?(+2).
</P>
<P>
Consider the following pattern, which contains non-significant white space to
make it more readable (assume the PCRE_EXTENDED option) and to divide it into
three parts for ease of discussion:
<pre>
  ( \( )?    [^()]+    (?(1) \) )
</pre>
The first part matches an optional opening parenthesis, and if that
character is present, sets it as the first captured substring. The second part
matches one or more characters that are not parentheses. The third part is a
conditional subpattern that tests whether the first set of parentheses matched
or not. If they did, that is, if subject started with an opening parenthesis,
the condition is true, and so the yes-pattern is executed and a closing
parenthesis is required. Otherwise, since no-pattern is not present, the
subpattern matches nothing. In other words, this pattern matches a sequence of
non-parentheses, optionally enclosed in parentheses.
</P>
<P>
If you were embedding this pattern in a larger one, you could use a relative
reference:
<pre>
  ...other stuff... ( \( )?    [^()]+    (?(-1) \) ) ...
</pre>
This makes the fragment independent of the parentheses in the larger pattern.
</P>
<br><b>
Checking for a used subpattern by name
</b><br>
<P>
Perl uses the syntax (?(&#60;name&#62;)...) or (?('name')...) to test for a used
subpattern by name. For compatibility with earlier versions of PCRE, which had
this facility before Perl, the syntax (?(name)...) is also recognized. However,
there is a possible ambiguity with this syntax, because subpattern names may
consist entirely of digits. PCRE looks first for a named subpattern; if it
cannot find one and the name consists entirely of digits, PCRE looks for a
subpattern of that number, which must be greater than zero. Using subpattern
names that consist entirely of digits is not recommended.
</P>
<P>
Rewriting the above example to use a named subpattern gives this:
<pre>
  (?&#60;OPEN&#62; \( )?    [^()]+    (?(&#60;OPEN&#62;) \) )
</pre>
If the name used in a condition of this kind is a duplicate, the test is
applied to all subpatterns of the same name, and is true if any one of them has
matched.
</P>
<br><b>
Checking for pattern recursion
</b><br>
<P>
If the condition is the string (R), and there is no subpattern with the name R,
the condition is true if a recursive call to the whole pattern or any
subpattern has been made. If digits or a name preceded by ampersand follow the
letter R, for example:
<pre>
  (?(R3)...) or (?(R&name)...)
</pre>
the condition is true if the most recent recursion is into a subpattern whose
number or name is given. This condition does not check the entire recursion
stack. If the name used in a condition of this kind is a duplicate, the test is
applied to all subpatterns of the same name, and is true if any one of them is
the most recent recursion.
</P>
<P>
At "top level", all these recursion test conditions are false.
<a href="#recursion">The syntax for recursive patterns</a>
is described below.
</P>
<br><b>
Defining subpatterns for use by reference only
</b><br>
<P>
If the condition is the string (DEFINE), and there is no subpattern with the
name DEFINE, the condition is always false. In this case, there may be only one
alternative in the subpattern. It is always skipped if control reaches this
point in the pattern; the idea of DEFINE is that it can be used to define
"subroutines" that can be referenced from elsewhere. (The use of
<a href="#subpatternsassubroutines">"subroutines"</a>
is described below.) For example, a pattern to match an IPv4 address could be
written like this (ignore whitespace and line breaks):
<pre>
  (?(DEFINE) (?&#60;byte&#62; 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
  \b (?&byte) (\.(?&byte)){3} \b
</pre>
The first part of the pattern is a DEFINE group inside which a another group
named "byte" is defined. This matches an individual component of an IPv4
address (a number less than 256). When matching takes place, this part of the
pattern is skipped because DEFINE acts like a false condition. The rest of the
pattern uses references to the named group to match the four dot-separated
components of an IPv4 address, insisting on a word boundary at each end.
</P>
<br><b>
Assertion conditions
</b><br>
<P>
If the condition is not in any of the above formats, it must be an assertion.
This may be a positive or negative lookahead or lookbehind assertion. Consider
this pattern, again containing non-significant white space, and with the two
alternatives on the second line:
<pre>
  (?(?=[^a-z]*[a-z])
  \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} )
</pre>
The condition is a positive lookahead assertion that matches an optional
sequence of non-letters followed by a letter. In other words, it tests for the
presence of at least one letter in the subject. If a letter is found, the
subject is matched against the first alternative; otherwise it is matched
against the second. This pattern matches strings in one of the two forms
dd-aaa-dd or dd-dd-dd, where aaa are letters and dd are digits.
<a name="comments"></a></P>
<br><a name="SEC20" href="#TOC1">COMMENTS</a><br>
<P>
The sequence (?# marks the start of a comment that continues up to the next
closing parenthesis. Nested parentheses are not permitted. The characters
that make up a comment play no part in the pattern matching at all.
</P>
<P>
If the PCRE_EXTENDED option is set, an unescaped # character outside a
character class introduces a comment that continues to immediately after the
next newline in the pattern.
<a name="recursion"></a></P>
<br><a name="SEC21" href="#TOC1">RECURSIVE PATTERNS</a><br>
<P>
Consider the problem of matching a string in parentheses, allowing for
unlimited nested parentheses. Without the use of recursion, the best that can
be done is to use a pattern that matches up to some fixed depth of nesting. It
is not possible to handle an arbitrary nesting depth.
</P>
<P>
For some time, Perl has provided a facility that allows regular expressions to
recurse (amongst other things). It does this by interpolating Perl code in the
expression at run time, and the code can refer to the expression itself. A Perl
pattern using code interpolation to solve the parentheses problem can be
created like this:
<pre>
  $re = qr{\( (?: (?&#62;[^()]+) | (?p{$re}) )* \)}x;
</pre>
The (?p{...}) item interpolates Perl code at run time, and in this case refers
recursively to the pattern in which it appears.
</P>
<P>
Obviously, PCRE cannot support the interpolation of Perl code. Instead, it
supports special syntax for recursion of the entire pattern, and also for
individual subpattern recursion. After its introduction in PCRE and Python,
this kind of recursion was subsequently introduced into Perl at release 5.10.
</P>
<P>
A special item that consists of (? followed by a number greater than zero and a
closing parenthesis is a recursive call of the subpattern of the given number,
provided that it occurs inside that subpattern. (If not, it is a
<a href="#subpatternsassubroutines">"subroutine"</a>
call, which is described in the next section.) The special item (?R) or (?0) is
a recursive call of the entire regular expression.
</P>
<P>
This PCRE pattern solves the nested parentheses problem (assume the
PCRE_EXTENDED option is set so that white space is ignored):
<pre>
  \( ( [^()]++ | (?R) )* \)
</pre>
First it matches an opening parenthesis. Then it matches any number of
substrings which can either be a sequence of non-parentheses, or a recursive
match of the pattern itself (that is, a correctly parenthesized substring).
Finally there is a closing parenthesis. Note the use of a possessive quantifier
to avoid backtracking into sequences of non-parentheses.
</P>
<P>
If this were part of a larger pattern, you would not want to recurse the entire
pattern, so instead you could use this:
<pre>
  ( \( ( [^()]++ | (?1) )* \) )
</pre>
We have put the pattern into parentheses, and caused the recursion to refer to
them instead of the whole pattern.
</P>
<P>
In a larger pattern, keeping track of parenthesis numbers can be tricky. This
is made easier by the use of relative references (a Perl 5.10 feature).
Instead of (?1) in the pattern above you can write (?-2) to refer to the second
most recently opened parentheses preceding the recursion. In other words, a
negative number counts capturing parentheses leftwards from the point at which
it is encountered.
</P>
<P>
It is also possible to refer to subsequently opened parentheses, by writing
references such as (?+2). However, these cannot be recursive because the
reference is not inside the parentheses that are referenced. They are always
<a href="#subpatternsassubroutines">"subroutine"</a>
calls, as described in the next section.
</P>
<P>
An alternative approach is to use named parentheses instead. The Perl syntax
for this is (?&name); PCRE's earlier syntax (?P&#62;name) is also supported. We
could rewrite the above example as follows:
<pre>
  (?&#60;pn&#62; \( ( [^()]++ | (?&pn) )* \) )
</pre>
If there is more than one subpattern with the same name, the earliest one is
used.
</P>
<P>
This particular example pattern that we have been looking at contains nested
unlimited repeats, and so the use of a possessive quantifier for matching
strings of non-parentheses is important when applying the pattern to strings
that do not match. For example, when this pattern is applied to
<pre>
  (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()
</pre>
it yields "no match" quickly. However, if a possessive quantifier is not used,
the match runs for a very long time indeed because there are so many different
ways the + and * repeats can carve up the subject, and all have to be tested
before failure can be reported.
</P>
<P>
At the end of a match, the values of capturing parentheses are those from
the outermost level. If you want to obtain intermediate values, a callout
function can be used (see below and the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation). If the pattern above is matched against
<pre>
  (ab(cd)ef)
</pre>
the value for the inner capturing parentheses (numbered 2) is "ef", which is
the last value taken on at the top level. If a capturing subpattern is not
matched at the top level, its final value is unset, even if it is (temporarily)
set at a deeper level.
</P>
<P>
If there are more than 15 capturing parentheses in a pattern, PCRE has to
obtain extra memory to store data during a recursion, which it does by using
<b>pcre_malloc</b>, freeing it via <b>pcre_free</b> afterwards. If no memory can
be obtained, the match fails with the PCRE_ERROR_NOMEMORY error.
</P>
<P>
Do not confuse the (?R) item with the condition (R), which tests for recursion.
Consider this pattern, which matches text in angle brackets, allowing for
arbitrary nesting. Only digits are allowed in nested brackets (that is, when
recursing), whereas any characters are permitted at the outer level.
<pre>
  &#60; (?: (?(R) \d++  | [^&#60;&#62;]*+) | (?R)) * &#62;
</pre>
In this pattern, (?(R) is the start of a conditional subpattern, with two
different alternatives for the recursive and non-recursive cases. The (?R) item
is the actual recursive call.
<a name="recursiondifference"></a></P>
<br><b>
Recursion difference from Perl
</b><br>
<P>
In PCRE (like Python, but unlike Perl), a recursive subpattern call is always
treated as an atomic group. That is, once it has matched some of the subject
string, it is never re-entered, even if it contains untried alternatives and
there is a subsequent matching failure. This can be illustrated by the
following pattern, which purports to match a palindromic string that contains
an odd number of characters (for example, "a", "aba", "abcba", "abcdcba"):
<pre>
  ^(.|(.)(?1)\2)$
</pre>
The idea is that it either matches a single character, or two identical
characters surrounding a sub-palindrome. In Perl, this pattern works; in PCRE
it does not if the pattern is longer than three characters. Consider the
subject string "abcba":
</P>
<P>
At the top level, the first character is matched, but as it is not at the end
of the string, the first alternative fails; the second alternative is taken
and the recursion kicks in. The recursive call to subpattern 1 successfully
matches the next character ("b"). (Note that the beginning and end of line
tests are not part of the recursion).
</P>
<P>
Back at the top level, the next character ("c") is compared with what
subpattern 2 matched, which was "a". This fails. Because the recursion is
treated as an atomic group, there are now no backtracking points, and so the
entire match fails. (Perl is able, at this point, to re-enter the recursion and
try the second alternative.) However, if the pattern is written with the
alternatives in the other order, things are different:
<pre>
  ^((.)(?1)\2|.)$
</pre>
This time, the recursing alternative is tried first, and continues to recurse
until it runs out of characters, at which point the recursion fails. But this
time we do have another alternative to try at the higher level. That is the big
difference: in the previous case the remaining alternative is at a deeper
recursion level, which PCRE cannot use.
</P>
<P>
To change the pattern so that matches all palindromic strings, not just those
with an odd number of characters, it is tempting to change the pattern to this:
<pre>
  ^((.)(?1)\2|.?)$
</pre>
Again, this works in Perl, but not in PCRE, and for the same reason. When a
deeper recursion has matched a single character, it cannot be entered again in
order to match an empty string. The solution is to separate the two cases, and
write out the odd and even cases as alternatives at the higher level:
<pre>
  ^(?:((.)(?1)\2|)|((.)(?3)\4|.))
</pre>
If you want to match typical palindromic phrases, the pattern has to ignore all
non-word characters, which can be done like this:
<pre>
  ^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$
</pre>
If run with the PCRE_CASELESS option, this pattern matches phrases such as "A
man, a plan, a canal: Panama!" and it works well in both PCRE and Perl. Note
the use of the possessive quantifier *+ to avoid backtracking into sequences of
non-word characters. Without this, PCRE takes a great deal longer (ten times or
more) to match typical phrases, and Perl takes so long that you think it has
gone into a loop.
</P>
<P>
<b>WARNING</b>: The palindrome-matching patterns above work only if the subject
string does not start with a palindrome that is shorter than the entire string.
For example, although "abcba" is correctly matched, if the subject is "ababa",
PCRE finds the palindrome "aba" at the start, then fails at top level because
the end of the string does not follow. Once again, it cannot jump back into the
recursion to try other alternatives, so the entire match fails.
<a name="subpatternsassubroutines"></a></P>
<br><a name="SEC22" href="#TOC1">SUBPATTERNS AS SUBROUTINES</a><br>
<P>
If the syntax for a recursive subpattern reference (either by number or by
name) is used outside the parentheses to which it refers, it operates like a
subroutine in a programming language. The "called" subpattern may be defined
before or after the reference. A numbered reference can be absolute or
relative, as in these examples:
<pre>
  (...(absolute)...)...(?2)...
  (...(relative)...)...(?-1)...
  (...(?+1)...(relative)...
</pre>
An earlier example pointed out that the pattern
<pre>
  (sens|respons)e and \1ibility
</pre>
matches "sense and sensibility" and "response and responsibility", but not
"sense and responsibility". If instead the pattern
<pre>
  (sens|respons)e and (?1)ibility
</pre>
is used, it does match "sense and responsibility" as well as the other two
strings. Another example is given in the discussion of DEFINE above.
</P>
<P>
Like recursive subpatterns, a subroutine call is always treated as an atomic
group. That is, once it has matched some of the subject string, it is never
re-entered, even if it contains untried alternatives and there is a subsequent
matching failure. Any capturing parentheses that are set during the subroutine
call revert to their previous values afterwards.
</P>
<P>
When a subpattern is used as a subroutine, processing options such as
case-independence are fixed when the subpattern is defined. They cannot be
changed for different calls. For example, consider this pattern:
<pre>
  (abc)(?i:(?-1))
</pre>
It matches "abcabc". It does not match "abcABC" because the change of
processing option does not affect the called subpattern.
<a name="onigurumasubroutines"></a></P>
<br><a name="SEC23" href="#TOC1">ONIGURUMA SUBROUTINE SYNTAX</a><br>
<P>
For compatibility with Oniguruma, the non-Perl syntax \g followed by a name or
a number enclosed either in angle brackets or single quotes, is an alternative
syntax for referencing a subpattern as a subroutine, possibly recursively. Here
are two of the examples used above, rewritten using this syntax:
<pre>
  (?&#60;pn&#62; \( ( (?&#62;[^()]+) | \g&#60;pn&#62; )* \) )
  (sens|respons)e and \g'1'ibility
</pre>
PCRE supports an extension to Oniguruma: if a number is preceded by a
plus or a minus sign it is taken as a relative reference. For example:
<pre>
  (abc)(?i:\g&#60;-1&#62;)
</pre>
Note that \g{...} (Perl syntax) and \g&#60;...&#62; (Oniguruma syntax) are <i>not</i>
synonymous. The former is a back reference; the latter is a subroutine call.
</P>
<br><a name="SEC24" href="#TOC1">CALLOUTS</a><br>
<P>
Perl has a feature whereby using the sequence (?{...}) causes arbitrary Perl
code to be obeyed in the middle of matching a regular expression. This makes it
possible, amongst other things, to extract different substrings that match the
same pair of parentheses when there is a repetition.
</P>
<P>
PCRE provides a similar feature, but of course it cannot obey arbitrary Perl
code. The feature is called "callout". The caller of PCRE provides an external
function by putting its entry point in the global variable <i>pcre_callout</i>.
By default, this variable contains NULL, which disables all calling out.
</P>
<P>
Within a regular expression, (?C) indicates the points at which the external
function is to be called. If you want to identify different callout points, you
can put a number less than 256 after the letter C. The default value is zero.
For example, this pattern has two callout points:
<pre>
  (?C1)abc(?C2)def
</pre>
If the PCRE_AUTO_CALLOUT flag is passed to <b>pcre_compile()</b>, callouts are
automatically installed before each item in the pattern. They are all numbered
255.
</P>
<P>
During matching, when PCRE reaches a callout point (and <i>pcre_callout</i> is
set), the external function is called. It is provided with the number of the
callout, the position in the pattern, and, optionally, one item of data
originally supplied by the caller of <b>pcre_exec()</b>. The callout function
may cause matching to proceed, to backtrack, or to fail altogether. A complete
description of the interface to the callout function is given in the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation.
</P>
<br><a name="SEC25" href="#TOC1">BACKTRACKING CONTROL</a><br>
<P>
Perl 5.10 introduced a number of "Special Backtracking Control Verbs", which
are described in the Perl documentation as "experimental and subject to change
or removal in a future version of Perl". It goes on to say: "Their usage in
production code should be noted to avoid problems during upgrades." The same
remarks apply to the PCRE features described in this section.
</P>
<P>
Since these verbs are specifically related to backtracking, most of them can be
used only when the pattern is to be matched using <b>pcre_exec()</b>, which uses
a backtracking algorithm. With the exception of (*FAIL), which behaves like a
failing negative assertion, they cause an error if encountered by
<b>pcre_dfa_exec()</b>.
</P>
<P>
If any of these verbs are used in an assertion or subroutine subpattern
(including recursive subpatterns), their effect is confined to that subpattern;
it does not extend to the surrounding pattern. Note that such subpatterns are
processed as anchored at the point where they are tested.
</P>
<P>
The new verbs make use of what was previously invalid syntax: an opening
parenthesis followed by an asterisk. In Perl, they are generally of the form
(*VERB:ARG) but PCRE does not support the use of arguments, so its general
form is just (*VERB). Any number of these verbs may occur in a pattern. There
are two kinds:
</P>
<br><b>
Verbs that act immediately
</b><br>
<P>
The following verbs act as soon as they are encountered:
<pre>
   (*ACCEPT)
</pre>
This verb causes the match to end successfully, skipping the remainder of the
pattern. When inside a recursion, only the innermost pattern is ended
immediately. If (*ACCEPT) is inside capturing parentheses, the data so far is
captured. (This feature was added to PCRE at release 8.00.) For example:
<pre>
  A((?:A|B(*ACCEPT)|C)D)
</pre>
This matches "AB", "AAD", or "ACD"; when it matches "AB", "B" is captured by
the outer parentheses.
<pre>
  (*FAIL) or (*F)
</pre>
This verb causes the match to fail, forcing backtracking to occur. It is
equivalent to (?!) but easier to read. The Perl documentation notes that it is
probably useful only when combined with (?{}) or (??{}). Those are, of course,
Perl features that are not present in PCRE. The nearest equivalent is the
callout feature, as for example in this pattern:
<pre>
  a+(?C)(*FAIL)
</pre>
A match with the string "aaaa" always fails, but the callout is taken before
each backtrack happens (in this example, 10 times).
</P>
<br><b>
Verbs that act after backtracking
</b><br>
<P>
The following verbs do nothing when they are encountered. Matching continues
with what follows, but if there is no subsequent match, a failure is forced.
The verbs differ in exactly what kind of failure occurs.
<pre>
  (*COMMIT)
</pre>
This verb causes the whole match to fail outright if the rest of the pattern
does not match. Even if the pattern is unanchored, no further attempts to find
a match by advancing the starting point take place. Once (*COMMIT) has been
passed, <b>pcre_exec()</b> is committed to finding a match at the current
starting point, or not at all. For example:
<pre>
  a+(*COMMIT)b
</pre>
This matches "xxaab" but not "aacaab". It can be thought of as a kind of
dynamic anchor, or "I've started, so I must finish."
<pre>
  (*PRUNE)
</pre>
This verb causes the match to fail at the current position if the rest of the
pattern does not match. If the pattern is unanchored, the normal "bumpalong"
advance to the next starting character then happens. Backtracking can occur as
usual to the left of (*PRUNE), or when matching to the right of (*PRUNE), but
if there is no match to the right, backtracking cannot cross (*PRUNE).
In simple cases, the use of (*PRUNE) is just an alternative to an atomic
group or possessive quantifier, but there are some uses of (*PRUNE) that cannot
be expressed in any other way.
<pre>
  (*SKIP)
</pre>
This verb is like (*PRUNE), except that if the pattern is unanchored, the
"bumpalong" advance is not to the next character, but to the position in the
subject where (*SKIP) was encountered. (*SKIP) signifies that whatever text
was matched leading up to it cannot be part of a successful match. Consider:
<pre>
  a+(*SKIP)b
</pre>
If the subject is "aaaac...", after the first match attempt fails (starting at
the first character in the string), the starting point skips on to start the
next attempt at "c". Note that a possessive quantifer does not have the same
effect as this example; although it would suppress backtracking during the
first match attempt, the second attempt would start at the second character
instead of skipping on to "c".
<pre>
  (*THEN)
</pre>
This verb causes a skip to the next alternation if the rest of the pattern does
not match. That is, it cancels pending backtracking, but only within the
current alternation. Its name comes from the observation that it can be used
for a pattern-based if-then-else block:
<pre>
  ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...
</pre>
If the COND1 pattern matches, FOO is tried (and possibly further items after
the end of the group if FOO succeeds); on failure the matcher skips to the
second alternative and tries COND2, without backtracking into COND1. If (*THEN)
is used outside of any alternation, it acts exactly like (*PRUNE).
</P>
<br><a name="SEC26" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcreapi</b>(3), <b>pcrecallout</b>(3), <b>pcrematching</b>(3),
<b>pcresyntax</b>(3), <b>pcre</b>(3).
</P>
<br><a name="SEC27" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC28" href="#TOC1">REVISION</a><br>
<P>
Last updated: 06 March 2010
<br>
Copyright &copy; 1997-2010 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcreperform.html000064400000016155150403561460016454 0ustar00<html>
<head>
<title>pcreperform specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcreperform man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
PCRE PERFORMANCE
</b><br>
<P>
Two aspects of performance are discussed below: memory usage and processing
time. The way you express your pattern as a regular expression can affect both
of them.
</P>
<br><b>
COMPILED PATTERN MEMORY USAGE
</b><br>
<P>
Patterns are compiled by PCRE into a reasonably efficient byte code, so that
most simple patterns do not use much memory. However, there is one case where
the memory usage of a compiled pattern can be unexpectedly large. If a
parenthesized subpattern has a quantifier with a minimum greater than 1 and/or
a limited maximum, the whole subpattern is repeated in the compiled code. For
example, the pattern
<pre>
  (abc|def){2,4}
</pre>
is compiled as if it were
<pre>
  (abc|def)(abc|def)((abc|def)(abc|def)?)?
</pre>
(Technical aside: It is done this way so that backtrack points within each of
the repetitions can be independently maintained.)
</P>
<P>
For regular expressions whose quantifiers use only small numbers, this is not
usually a problem. However, if the numbers are large, and particularly if such
repetitions are nested, the memory usage can become an embarrassment. For
example, the very simple pattern
<pre>
  ((ab){1,1000}c){1,3}
</pre>
uses 51K bytes when compiled. When PCRE is compiled with its default internal
pointer size of two bytes, the size limit on a compiled pattern is 64K, and
this is reached with the above pattern if the outer repetition is increased
from 3 to 4. PCRE can be compiled to use larger internal pointers and thus
handle larger compiled patterns, but it is better to try to rewrite your
pattern to use less memory if you can.
</P>
<P>
One way of reducing the memory usage for such patterns is to make use of PCRE's
<a href="pcrepattern.html#subpatternsassubroutines">"subroutine"</a>
facility. Re-writing the above pattern as
<pre>
  ((ab)(?2){0,999}c)(?1){0,2}
</pre>
reduces the memory requirements to 18K, and indeed it remains under 20K even
with the outer repetition increased to 100. However, this pattern is not
exactly equivalent, because the "subroutine" calls are treated as
<a href="pcrepattern.html#atomicgroup">atomic groups</a>
into which there can be no backtracking if there is a subsequent matching
failure. Therefore, PCRE cannot do this kind of rewriting automatically.
Furthermore, there is a noticeable loss of speed when executing the modified
pattern. Nevertheless, if the atomic grouping is not a problem and the loss of
speed is acceptable, this kind of rewriting will allow you to process patterns
that PCRE cannot otherwise handle.
</P>
<br><b>
STACK USAGE AT RUN TIME
</b><br>
<P>
When <b>pcre_exec()</b> is used for matching, certain kinds of pattern can cause
it to use large amounts of the process stack. In some environments the default
process stack is quite small, and if it runs out the result is often SIGSEGV.
This issue is probably the most frequently raised problem with PCRE. Rewriting
your pattern can often help. The
<a href="pcrestack.html"><b>pcrestack</b></a>
documentation discusses this issue in detail.
</P>
<br><b>
PROCESSING TIME
</b><br>
<P>
Certain items in regular expression patterns are processed more efficiently
than others. It is more efficient to use a character class like [aeiou] than a
set of single-character alternatives such as (a|e|i|o|u). In general, the
simplest construction that provides the required behaviour is usually the most
efficient. Jeffrey Friedl's book contains a lot of useful general discussion
about optimizing regular expressions for efficient performance. This document
contains a few observations about PCRE.
</P>
<P>
Using Unicode character properties (the \p, \P, and \X escapes) is slow,
because PCRE has to scan a structure that contains data for over fifteen
thousand characters whenever it needs a character's property. If you can find
an alternative pattern that does not use character properties, it will probably
be faster.
</P>
<P>
When a pattern begins with .* not in parentheses, or in parentheses that are
not the subject of a backreference, and the PCRE_DOTALL option is set, the
pattern is implicitly anchored by PCRE, since it can match only at the start of
a subject string. However, if PCRE_DOTALL is not set, PCRE cannot make this
optimization, because the . metacharacter does not then match a newline, and if
the subject string contains newlines, the pattern may match from the character
immediately following one of them instead of from the very start. For example,
the pattern
<pre>
  .*second
</pre>
matches the subject "first\nand second" (where \n stands for a newline
character), with the match starting at the seventh character. In order to do
this, PCRE has to retry the match starting after every newline in the subject.
</P>
<P>
If you are using such a pattern with subject strings that do not contain
newlines, the best performance is obtained by setting PCRE_DOTALL, or starting
the pattern with ^.* or ^.*? to indicate explicit anchoring. That saves PCRE
from having to scan along the subject looking for a newline to restart at.
</P>
<P>
Beware of patterns that contain nested indefinite repeats. These can take a
long time to run when applied to a string that does not match. Consider the
pattern fragment
<pre>
  ^(a+)*
</pre>
This can match "aaaa" in 16 different ways, and this number increases very
rapidly as the string gets longer. (The * repeat can match 0, 1, 2, 3, or 4
times, and for each of those cases other than 0 or 4, the + repeats can match
different numbers of times.) When the remainder of the pattern is such that the
entire match is going to fail, PCRE has in principle to try every possible
variation, and this can take an extremely long time, even for relatively short
strings.
</P>
<P>
An optimization catches some of the more simple cases such as
<pre>
  (a+)*b
</pre>
where a literal character follows. Before embarking on the standard matching
procedure, PCRE checks that there is a "b" later in the subject string, and if
there is not, it fails the match immediately. However, when there is no
following literal this optimization cannot be used. You can see the difference
by comparing the behaviour of
<pre>
  (a+)*\d
</pre>
with the pattern above. The former gives a failure almost instantly when
applied to a whole line of "a" characters, whereas the latter takes an
appreciable time with strings longer than about 20 characters.
</P>
<P>
In many cases, the solution to this kind of performance issue is to use an
atomic group or a possessive quantifier.
</P>
<br><b>
AUTHOR
</b><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><b>
REVISION
</b><br>
<P>
Last updated: 07 March 2010
<br>
Copyright &copy; 1997-2010 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre-config.html000064400000005421150403561460016316 0ustar00<html>
<head>
<title>pcre-config specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre-config man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">OPTIONS</a>
<li><a name="TOC4" href="#SEC4">SEE ALSO</a>
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
<li><a name="TOC6" href="#SEC6">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>pcre-config  [--prefix] [--exec-prefix] [--version] [--libs]</b>
<b>[--libs-posix] [--cflags] [--cflags-posix]</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
<b>pcre-config</b> returns the configuration of the installed PCRE
libraries and the options required to compile a program to use them.
</P>
<br><a name="SEC3" href="#TOC1">OPTIONS</a><br>
<P>
<b>--prefix</b>
Writes the directory prefix used in the PCRE installation for architecture
independent files (<i>/usr</i> on many systems, <i>/usr/local</i> on some
systems) to the standard output.
</P>
<P>
<b>--exec-prefix</b>
Writes the directory prefix used in the PCRE installation for architecture
dependent files (normally the same as <b>--prefix</b>) to the standard output.
</P>
<P>
<b>--version</b>
Writes the version number of the installed PCRE libraries to the standard
output.
</P>
<P>
<b>--libs</b>
Writes to the standard output the command line options required to link
with PCRE (<b>-lpcre</b> on many systems).
</P>
<P>
<b>--libs-posix</b>
Writes to the standard output the command line options required to link with
the PCRE posix emulation library (<b>-lpcreposix</b> <b>-lpcre</b> on many
systems).
</P>
<P>
<b>--cflags</b>
Writes to the standard output the command line options required to compile
files that use PCRE (this may include some <b>-I</b> options, but is blank on
many systems).
</P>
<P>
<b>--cflags-posix</b>
Writes to the standard output the command line options required to compile
files that use the PCRE posix emulation library (this may include some <b>-I</b>
options, but is blank on many systems).
</P>
<br><a name="SEC4" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre(3)</b>
</P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
<P>
This manual page was originally written by Mark Baker for the Debian GNU/Linux
system. It has been slightly revised as a generic PCRE man page.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 18 April 2007
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcreprecompile.html000064400000014675150403561470017147 0ustar00<html>
<head>
<title>pcreprecompile specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcreprecompile man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SAVING AND RE-USING PRECOMPILED PCRE PATTERNS</a>
<li><a name="TOC2" href="#SEC2">SAVING A COMPILED PATTERN</a>
<li><a name="TOC3" href="#SEC3">RE-USING A PRECOMPILED PATTERN</a>
<li><a name="TOC4" href="#SEC4">COMPATIBILITY WITH DIFFERENT PCRE RELEASES</a>
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
<li><a name="TOC6" href="#SEC6">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SAVING AND RE-USING PRECOMPILED PCRE PATTERNS</a><br>
<P>
If you are running an application that uses a large number of regular
expression patterns, it may be useful to store them in a precompiled form
instead of having to compile them every time the application is run.
If you are not using any private character tables (see the
<a href="pcre_maketables.html"><b>pcre_maketables()</b></a>
documentation), this is relatively straightforward. If you are using private
tables, it is a little bit more complicated.
</P>
<P>
If you save compiled patterns to a file, you can copy them to a different host
and run them there. This works even if the new host has the opposite endianness
to the one on which the patterns were compiled. There may be a small
performance penalty, but it should be insignificant. However, compiling regular
expressions with one version of PCRE for use with a different version is not
guaranteed to work and may cause crashes.
</P>
<br><a name="SEC2" href="#TOC1">SAVING A COMPILED PATTERN</a><br>
<P>
The value returned by <b>pcre_compile()</b> points to a single block of memory
that holds the compiled pattern and associated data. You can find the length of
this block in bytes by calling <b>pcre_fullinfo()</b> with an argument of
PCRE_INFO_SIZE. You can then save the data in any appropriate manner. Here is
sample code that compiles a pattern and writes it to a file. It assumes that
the variable <i>fd</i> refers to a file that is open for output:
<pre>
  int erroroffset, rc, size;
  char *error;
  pcre *re;

  re = pcre_compile("my pattern", 0, &error, &erroroffset, NULL);
  if (re == NULL) { ... handle errors ... }
  rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
  if (rc &#60; 0) { ... handle errors ... }
  rc = fwrite(re, 1, size, fd);
  if (rc != size) { ... handle errors ... }
</pre>
In this example, the bytes that comprise the compiled pattern are copied
exactly. Note that this is binary data that may contain any of the 256 possible
byte values. On systems that make a distinction between binary and non-binary
data, be sure that the file is opened for binary output.
</P>
<P>
If you want to write more than one pattern to a file, you will have to devise a
way of separating them. For binary data, preceding each pattern with its length
is probably the most straightforward approach. Another possibility is to write
out the data in hexadecimal instead of binary, one pattern to a line.
</P>
<P>
Saving compiled patterns in a file is only one possible way of storing them for
later use. They could equally well be saved in a database, or in the memory of
some daemon process that passes them via sockets to the processes that want
them.
</P>
<P>
If the pattern has been studied, it is also possible to save the study data in
a similar way to the compiled pattern itself. When studying generates
additional information, <b>pcre_study()</b> returns a pointer to a
<b>pcre_extra</b> data block. Its format is defined in the
<a href="pcreapi.html#extradata">section on matching a pattern</a>
in the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation. The <i>study_data</i> field points to the binary study data, and
this is what you must save (not the <b>pcre_extra</b> block itself). The length
of the study data can be obtained by calling <b>pcre_fullinfo()</b> with an
argument of PCRE_INFO_STUDYSIZE. Remember to check that <b>pcre_study()</b> did
return a non-NULL value before trying to save the study data.
</P>
<br><a name="SEC3" href="#TOC1">RE-USING A PRECOMPILED PATTERN</a><br>
<P>
Re-using a precompiled pattern is straightforward. Having reloaded it into main
memory, you pass its pointer to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> in
the usual way. This should work even on another host, and even if that host has
the opposite endianness to the one where the pattern was compiled.
</P>
<P>
However, if you passed a pointer to custom character tables when the pattern
was compiled (the <i>tableptr</i> argument of <b>pcre_compile()</b>), you must
now pass a similar pointer to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>,
because the value saved with the compiled pattern will obviously be nonsense. A
field in a <b>pcre_extra()</b> block is used to pass this data, as described in
the
<a href="pcreapi.html#extradata">section on matching a pattern</a>
in the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation.
</P>
<P>
If you did not provide custom character tables when the pattern was compiled,
the pointer in the compiled pattern is NULL, which causes <b>pcre_exec()</b> to
use PCRE's internal tables. Thus, you do not need to take any special action at
run time in this case.
</P>
<P>
If you saved study data with the compiled pattern, you need to create your own
<b>pcre_extra</b> data block and set the <i>study_data</i> field to point to the
reloaded study data. You must also set the PCRE_EXTRA_STUDY_DATA bit in the
<i>flags</i> field to indicate that study data is present. Then pass the
<b>pcre_extra</b> block to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> in the
usual way.
</P>
<br><a name="SEC4" href="#TOC1">COMPATIBILITY WITH DIFFERENT PCRE RELEASES</a><br>
<P>
In general, it is safest to recompile all saved patterns when you update to a
new PCRE release, though not all updates actually require this. Recompiling is
definitely needed for release 7.2.
</P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 13 June 2007
<br>
Copyright &copy; 1997-2007 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/index.html000064400000013050150403561470015227 0ustar00<html>
<!-- This is a manually maintained file that is the root of the HTML version of
     the PCRE documentation. When the HTML documents are built from the man
     page versions, the entire doc/html directory is emptied, this file is then
     copied into doc/html/index.html, and the remaining files therein are
     created by the 132html script.
-->
<head>
<title>PCRE specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>Perl-compatible Regular Expressions (PCRE)</h1>
<p>
The HTML documentation for PCRE comprises the following pages:
</p>

<table>
<tr><td><a href="pcre.html">pcre</a></td>
    <td>&nbsp;&nbsp;Introductory page</td></tr>

<tr><td><a href="pcre-config.html">pcre-config</a></td>
    <td>&nbsp;&nbsp;Information about the installation configuration</td></tr>

<tr><td><a href="pcreapi.html">pcreapi</a></td>
    <td>&nbsp;&nbsp;PCRE's native API</td></tr>

<tr><td><a href="pcrebuild.html">pcrebuild</a></td>
    <td>&nbsp;&nbsp;Options for building PCRE</td></tr>

<tr><td><a href="pcrecallout.html">pcrecallout</a></td>
    <td>&nbsp;&nbsp;The <i>callout</i> facility</td></tr>

<tr><td><a href="pcrecompat.html">pcrecompat</a></td>
    <td>&nbsp;&nbsp;Compability with Perl</td></tr>

<tr><td><a href="pcrecpp.html">pcrecpp</a></td>
    <td>&nbsp;&nbsp;The C++ wrapper for the PCRE library</td></tr>

<tr><td><a href="pcredemo.html">pcredemo</a></td>
    <td>&nbsp;&nbsp;A demonstration C program that uses the PCRE library</td></tr>

<tr><td><a href="pcregrep.html">pcregrep</a></td>
    <td>&nbsp;&nbsp;The <b>pcregrep</b> command</td></tr>

<tr><td><a href="pcrematching.html">pcrematching</a></td>
    <td>&nbsp;&nbsp;Discussion of the two matching algorithms</td></tr>

<tr><td><a href="pcrepartial.html">pcrepartial</a></td>
    <td>&nbsp;&nbsp;Using PCRE for partial matching</td></tr>

<tr><td><a href="pcrepattern.html">pcrepattern</a></td>
    <td>&nbsp;&nbsp;Specification of the regular expressions supported by PCRE</td></tr>

<tr><td><a href="pcreperform.html">pcreperform</a></td>
    <td>&nbsp;&nbsp;Some comments on performance</td></tr>

<tr><td><a href="pcreposix.html">pcreposix</a></td>
    <td>&nbsp;&nbsp;The POSIX API to the PCRE library</td></tr>

<tr><td><a href="pcreprecompile.html">pcreprecompile</a></td>
    <td>&nbsp;&nbsp;How to save and re-use compiled patterns</td></tr>

<tr><td><a href="pcresample.html">pcresample</a></td>
    <td>&nbsp;&nbsp;Discussion of the pcredemo program</td></tr>

<tr><td><a href="pcrestack.html">pcrestack</a></td>
    <td>&nbsp;&nbsp;Discussion of PCRE's stack usage</td></tr>

<tr><td><a href="pcresyntax.html">pcresyntax</a></td>
    <td>&nbsp;&nbsp;Syntax quick-reference summary</td></tr>

<tr><td><a href="pcretest.html">pcretest</a></td>
    <td>&nbsp;&nbsp;The <b>pcretest</b> command for testing PCRE</td></tr>
</table>

<p>
There are also individual pages that summarize the interface for each function
in the library:
</p>

<table>

<tr><td><a href="pcre_compile.html">pcre_compile</a></td>
    <td>&nbsp;&nbsp;Compile a regular expression</td></tr>

<tr><td><a href="pcre_compile2.html">pcre_compile2</a></td>
    <td>&nbsp;&nbsp;Compile a regular expression (alternate interface)</td></tr>

<tr><td><a href="pcre_config.html">pcre_config</a></td>
    <td>&nbsp;&nbsp;Show build-time configuration options</td></tr>

<tr><td><a href="pcre_copy_named_substring.html">pcre_copy_named_substring</a></td>
    <td>&nbsp;&nbsp;Extract named substring into given buffer</td></tr>

<tr><td><a href="pcre_copy_substring.html">pcre_copy_substring</a></td>
    <td>&nbsp;&nbsp;Extract numbered substring into given buffer</td></tr>

<tr><td><a href="pcre_dfa_exec.html">pcre_dfa_exec</a></td>
    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
    (DFA algorithm; <i>not</i> Perl compatible)</td></tr>

<tr><td><a href="pcre_exec.html">pcre_exec</a></td>
    <td>&nbsp;&nbsp;Match a compiled pattern to a subject string
    (Perl compatible)</td></tr>

<tr><td><a href="pcre_free_substring.html">pcre_free_substring</a></td>
    <td>&nbsp;&nbsp;Free extracted substring</td></tr>

<tr><td><a href="pcre_free_substring_list.html">pcre_free_substring_list</a></td>
    <td>&nbsp;&nbsp;Free list of extracted substrings</td></tr>

<tr><td><a href="pcre_fullinfo.html">pcre_fullinfo</a></td>
    <td>&nbsp;&nbsp;Extract information about a pattern</td></tr>

<tr><td><a href="pcre_get_named_substring.html">pcre_get_named_substring</a></td>
    <td>&nbsp;&nbsp;Extract named substring into new memory</td></tr>

<tr><td><a href="pcre_get_stringnumber.html">pcre_get_stringnumber</a></td>
    <td>&nbsp;&nbsp;Convert captured string name to number</td></tr>

<tr><td><a href="pcre_get_substring.html">pcre_get_substring</a></td>
    <td>&nbsp;&nbsp;Extract numbered substring into new memory</td></tr>

<tr><td><a href="pcre_get_substring_list.html">pcre_get_substring_list</a></td>
    <td>&nbsp;&nbsp;Extract all substrings into new memory</td></tr>

<tr><td><a href="pcre_info.html">pcre_info</a></td>
    <td>&nbsp;&nbsp;Obsolete information extraction function</td></tr>

<tr><td><a href="pcre_maketables.html">pcre_maketables</a></td>
    <td>&nbsp;&nbsp;Build character tables in current locale</td></tr>

<tr><td><a href="pcre_refcount.html">pcre_refcount</a></td>
    <td>&nbsp;&nbsp;Maintain reference count in compiled pattern</td></tr>

<tr><td><a href="pcre_study.html">pcre_study</a></td>
    <td>&nbsp;&nbsp;Study a compiled pattern</td></tr>

<tr><td><a href="pcre_version.html">pcre_version</a></td>
    <td>&nbsp;&nbsp;Return PCRE version and release date</td></tr>
</table>

</html>
usr/share/doc/alt-pcre802-devel/html/pcrecpp.html000064400000033724150403561470015566 0ustar00<html>
<head>
<title>pcrecpp specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrecpp man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF C++ WRAPPER</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">MATCHING INTERFACE</a>
<li><a name="TOC4" href="#SEC4">QUOTING METACHARACTERS</a>
<li><a name="TOC5" href="#SEC5">PARTIAL MATCHES</a>
<li><a name="TOC6" href="#SEC6">UTF-8 AND THE MATCHING INTERFACE</a>
<li><a name="TOC7" href="#SEC7">PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE</a>
<li><a name="TOC8" href="#SEC8">SCANNING TEXT INCREMENTALLY</a>
<li><a name="TOC9" href="#SEC9">PARSING HEX/OCTAL/C-RADIX NUMBERS</a>
<li><a name="TOC10" href="#SEC10">REPLACING PARTS OF STRINGS</a>
<li><a name="TOC11" href="#SEC11">AUTHOR</a>
<li><a name="TOC12" href="#SEC12">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF C++ WRAPPER</a><br>
<P>
<b>#include &#60;pcrecpp.h&#62;</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
The C++ wrapper for PCRE was provided by Google Inc. Some additional
functionality was added by Giuseppe Maxia. This brief man page was constructed
from the notes in the <i>pcrecpp.h</i> file, which should be consulted for
further details.
</P>
<br><a name="SEC3" href="#TOC1">MATCHING INTERFACE</a><br>
<P>
The "FullMatch" operation checks that supplied text matches a supplied pattern
exactly. If pointer arguments are supplied, it copies matched sub-strings that
match sub-patterns into them.
<pre>
  Example: successful match
     pcrecpp::RE re("h.*o");
     re.FullMatch("hello");

  Example: unsuccessful match (requires full match):
     pcrecpp::RE re("e");
     !re.FullMatch("hello");

  Example: creating a temporary RE object:
     pcrecpp::RE("h.*o").FullMatch("hello");
</pre>
You can pass in a "const char*" or a "string" for "text". The examples below
tend to use a const char*. You can, as in the different examples above, store
the RE object explicitly in a variable or use a temporary RE object. The
examples below use one mode or the other arbitrarily. Either could correctly be
used for any of these examples.
</P>
<P>
You must supply extra pointer arguments to extract matched subpieces.
<pre>
  Example: extracts "ruby" into "s" and 1234 into "i"
     int i;
     string s;
     pcrecpp::RE re("(\\w+):(\\d+)");
     re.FullMatch("ruby:1234", &s, &i);

  Example: does not try to extract any extra sub-patterns
     re.FullMatch("ruby:1234", &s);

  Example: does not try to extract into NULL
     re.FullMatch("ruby:1234", NULL, &i);

  Example: integer overflow causes failure
     !re.FullMatch("ruby:1234567891234", NULL, &i);

  Example: fails because there aren't enough sub-patterns:
     !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);

  Example: fails because string cannot be stored in integer
     !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
</pre>
The provided pointer arguments can be pointers to any scalar numeric
type, or one of:
<pre>
   string        (matched piece is copied to string)
   StringPiece   (StringPiece is mutated to point to matched piece)
   T             (where "bool T::ParseFrom(const char*, int)" exists)
   NULL          (the corresponding matched sub-pattern is not copied)
</pre>
The function returns true iff all of the following conditions are satisfied:
<pre>
  a. "text" matches "pattern" exactly;

  b. The number of matched sub-patterns is &#62;= number of supplied
     pointers;

  c. The "i"th argument has a suitable type for holding the
     string captured as the "i"th sub-pattern. If you pass in
     void * NULL for the "i"th argument, or a non-void * NULL
     of the correct type, or pass fewer arguments than the
     number of sub-patterns, "i"th captured sub-pattern is
     ignored.
</pre>
CAVEAT: An optional sub-pattern that does not exist in the matched
string is assigned the empty string. Therefore, the following will
return false (because the empty string is not a valid number):
<pre>
   int number;
   pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
</pre>
The matching interface supports at most 16 arguments per call.
If you need more, consider using the more general interface
<b>pcrecpp::RE::DoMatch</b>. See <b>pcrecpp.h</b> for the signature for
<b>DoMatch</b>.
</P>
<P>
NOTE: Do not use <b>no_arg</b>, which is used internally to mark the end of a
list of optional arguments, as a placeholder for missing arguments, as this can
lead to segfaults.
</P>
<br><a name="SEC4" href="#TOC1">QUOTING METACHARACTERS</a><br>
<P>
You can use the "QuoteMeta" operation to insert backslashes before all
potentially meaningful characters in a string. The returned string, used as a
regular expression, will exactly match the original string.
<pre>
  Example:
     string quoted = RE::QuoteMeta(unquoted);
</pre>
Note that it's legal to escape a character even if it has no special meaning in
a regular expression -- so this function does that. (This also makes it
identical to the perl function of the same name; see "perldoc -f quotemeta".)
For example, "1.5-2.0?" becomes "1\.5\-2\.0\?".
</P>
<br><a name="SEC5" href="#TOC1">PARTIAL MATCHES</a><br>
<P>
You can use the "PartialMatch" operation when you want the pattern
to match any substring of the text.
<pre>
  Example: simple search for a string:
     pcrecpp::RE("ell").PartialMatch("hello");

  Example: find first number in a string:
     int number;
     pcrecpp::RE re("(\\d+)");
     re.PartialMatch("x*100 + 20", &number);
     assert(number == 100);
</PRE>
</P>
<br><a name="SEC6" href="#TOC1">UTF-8 AND THE MATCHING INTERFACE</a><br>
<P>
By default, pattern and text are plain text, one byte per character. The UTF8
flag, passed to the constructor, causes both pattern and string to be treated
as UTF-8 text, still a byte stream but potentially multiple bytes per
character. In practice, the text is likelier to be UTF-8 than the pattern, but
the match returned may depend on the UTF8 flag, so always use it when matching
UTF8 text. For example, "." will match one byte normally but with UTF8 set may
match up to three bytes of a multi-byte character.
<pre>
  Example:
     pcrecpp::RE_Options options;
     options.set_utf8();
     pcrecpp::RE re(utf8_pattern, options);
     re.FullMatch(utf8_string);

  Example: using the convenience function UTF8():
     pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
     re.FullMatch(utf8_string);
</pre>
NOTE: The UTF8 flag is ignored if pcre was not configured with the
<pre>
      --enable-utf8 flag.
</PRE>
</P>
<br><a name="SEC7" href="#TOC1">PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE</a><br>
<P>
PCRE defines some modifiers to change the behavior of the regular expression
engine. The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle to
pass such modifiers to a RE class. Currently, the following modifiers are
supported:
<pre>
   modifier              description               Perl corresponding

   PCRE_CASELESS         case insensitive match      /i
   PCRE_MULTILINE        multiple lines match        /m
   PCRE_DOTALL           dot matches newlines        /s
   PCRE_DOLLAR_ENDONLY   $ matches only at end       N/A
   PCRE_EXTRA            strict escape parsing       N/A
   PCRE_EXTENDED         ignore whitespaces          /x
   PCRE_UTF8             handles UTF8 chars          built-in
   PCRE_UNGREEDY         reverses * and *?           N/A
   PCRE_NO_AUTO_CAPTURE  disables capturing parens   N/A (*)
</pre>
(*) Both Perl and PCRE allow non capturing parentheses by means of the
"?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
capture, while (ab|cd) does.
</P>
<P>
For a full account on how each modifier works, please check the
PCRE API reference page.
</P>
<P>
For each modifier, there are two member functions whose name is made
out of the modifier in lowercase, without the "PCRE_" prefix. For
instance, PCRE_CASELESS is handled by
<pre>
  bool caseless()
</pre>
which returns true if the modifier is set, and
<pre>
  RE_Options & set_caseless(bool)
</pre>
which sets or unsets the modifier. Moreover, PCRE_EXTRA_MATCH_LIMIT can be
accessed through the <b>set_match_limit()</b> and <b>match_limit()</b> member
functions. Setting <i>match_limit</i> to a non-zero value will limit the
execution of pcre to keep it from doing bad things like blowing the stack or
taking an eternity to return a result. A value of 5000 is good enough to stop
stack blowup in a 2MB thread stack. Setting <i>match_limit</i> to zero disables
match limiting. Alternatively, you can call <b>match_limit_recursion()</b>
which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much PCRE
recurses. <b>match_limit()</b> limits the number of matches PCRE does;
<b>match_limit_recursion()</b> limits the depth of internal recursion, and
therefore the amount of stack that is used.
</P>
<P>
Normally, to pass one or more modifiers to a RE class, you declare
a <i>RE_Options</i> object, set the appropriate options, and pass this
object to a RE constructor. Example:
<pre>
   RE_options opt;
   opt.set_caseless(true);
   if (RE("HELLO", opt).PartialMatch("hello world")) ...
</pre>
RE_options has two constructors. The default constructor takes no arguments and
creates a set of flags that are off by default. The optional parameter
<i>option_flags</i> is to facilitate transfer of legacy code from C programs.
This lets you do
<pre>
   RE(pattern,
     RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
</pre>
However, new code is better off doing
<pre>
   RE(pattern,
     RE_Options().set_caseless(true).set_multiline(true))
       .PartialMatch(str);
</pre>
If you are going to pass one of the most used modifiers, there are some
convenience functions that return a RE_Options class with the
appropriate modifier already set: <b>CASELESS()</b>, <b>UTF8()</b>,
<b>MULTILINE()</b>, <b>DOTALL</b>(), and <b>EXTENDED()</b>.
</P>
<P>
If you need to set several options at once, and you don't want to go through
the pains of declaring a RE_Options object and setting several options, there
is a parallel method that give you such ability on the fly. You can concatenate
several <b>set_xxxxx()</b> member functions, since each of them returns a
reference to its class object. For example, to pass PCRE_CASELESS,
PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one statement, you may write:
<pre>
   RE(" ^ xyz \\s+ .* blah$",
     RE_Options()
       .set_caseless(true)
       .set_extended(true)
       .set_multiline(true)).PartialMatch(sometext);

</PRE>
</P>
<br><a name="SEC8" href="#TOC1">SCANNING TEXT INCREMENTALLY</a><br>
<P>
The "Consume" operation may be useful if you want to repeatedly
match regular expressions at the front of a string and skip over
them as they match. This requires use of the "StringPiece" type,
which represents a sub-range of a real string. Like RE, StringPiece
is defined in the pcrecpp namespace.
<pre>
  Example: read lines of the form "var = value" from a string.
     string contents = ...;                 // Fill string somehow
     pcrecpp::StringPiece input(contents);  // Wrap in a StringPiece
</PRE>
</P>
<P>
<pre>
     string var;
     int value;
     pcrecpp::RE re("(\\w+) = (\\d+)\n");
     while (re.Consume(&input, &var, &value)) {
       ...;
     }
</pre>
Each successful call to "Consume" will set "var/value", and also
advance "input" so it points past the matched text.
</P>
<P>
The "FindAndConsume" operation is similar to "Consume" but does not
anchor your match at the beginning of the string. For example, you
could extract all words from a string by repeatedly calling
<pre>
  pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
</PRE>
</P>
<br><a name="SEC9" href="#TOC1">PARSING HEX/OCTAL/C-RADIX NUMBERS</a><br>
<P>
By default, if you pass a pointer to a numeric value, the
corresponding text is interpreted as a base-10 number. You can
instead wrap the pointer with a call to one of the operators Hex(),
Octal(), or CRadix() to interpret the text in another base. The
CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
prefixes, but defaults to base-10.
<pre>
  Example:
    int a, b, c, d;
    pcrecpp::RE re("(.*) (.*) (.*) (.*)");
    re.FullMatch("100 40 0100 0x40",
                 pcrecpp::Octal(&a), pcrecpp::Hex(&b),
                 pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
</pre>
will leave 64 in a, b, c, and d.
</P>
<br><a name="SEC10" href="#TOC1">REPLACING PARTS OF STRINGS</a><br>
<P>
You can replace the first match of "pattern" in "str" with "rewrite".
Within "rewrite", backslash-escaped digits (\1 to \9) can be
used to insert text matching corresponding parenthesized group
from the pattern. \0 in "rewrite" refers to the entire matching
text. For example:
<pre>
  string s = "yabba dabba doo";
  pcrecpp::RE("b+").Replace("d", &s);
</pre>
will leave "s" containing "yada dabba doo". The result is true if the pattern
matches and a replacement occurs, false otherwise.
</P>
<P>
<b>GlobalReplace</b> is like <b>Replace</b> except that it replaces all
occurrences of the pattern in the string with the rewrite. Replacements are
not subject to re-matching. For example:
<pre>
  string s = "yabba dabba doo";
  pcrecpp::RE("b+").GlobalReplace("d", &s);
</pre>
will leave "s" containing "yada dada doo". It returns the number of
replacements made.
</P>
<P>
<b>Extract</b> is like <b>Replace</b>, except that if the pattern matches,
"rewrite" is copied into "out" (an additional argument) with substitutions.
The non-matching portions of "text" are ignored. Returns true iff a match
occurred and the extraction happened successfully;  if no match occurs, the
string is left unaffected.
</P>
<br><a name="SEC11" href="#TOC1">AUTHOR</a><br>
<P>
The C++ wrapper was contributed by Google Inc.
<br>
Copyright &copy; 2007 Google Inc.
<br>
</P>
<br><a name="SEC12" href="#TOC1">REVISION</a><br>
<P>
Last updated: 17 March 2009
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_exec.html000064400000007423150403561470016064 0ustar00<html>
<head>
<title>pcre_exec specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_exec man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression against a given subject
string, using a matching algorithm that is similar to Perl's. It returns
offsets to captured substrings. Its arguments are:
<pre>
  <i>code</i>         Points to the compiled pattern
  <i>extra</i>        Points to an associated <b>pcre_extra</b> structure,
                 or is NULL
  <i>subject</i>      Points to the subject string
  <i>length</i>       Length of the subject string, in bytes
  <i>startoffset</i>  Offset in bytes in the subject at which to
                 start matching
  <i>options</i>      Option bits
  <i>ovector</i>      Points to a vector of ints for result offsets
  <i>ovecsize</i>     Number of elements in the vector (a multiple of 3)
</pre>
The options are:
<pre>
  PCRE_ANCHORED          Match only at the first position
  PCRE_BSR_ANYCRLF       \R matches only CR, LF, or CRLF
  PCRE_BSR_UNICODE       \R matches all Unicode line endings
  PCRE_NEWLINE_ANY       Recognize any Unicode newline sequence
  PCRE_NEWLINE_ANYCRLF   Recognize CR, LF, & CRLF as newline sequences
  PCRE_NEWLINE_CR        Recognize CR as the only newline sequence
  PCRE_NEWLINE_CRLF      Recognize CRLF as the only newline sequence
  PCRE_NEWLINE_LF        Recognize LF as the only newline sequence
  PCRE_NOTBOL            Subject string is not the beginning of a line
  PCRE_NOTEOL            Subject string is not the end of a line
  PCRE_NOTEMPTY          An empty string is not a valid match
  PCRE_NOTEMPTY_ATSTART  An empty string at the start of the subject
                           is not a valid match
  PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
  PCRE_NO_UTF8_CHECK     Do not check the subject for UTF-8
                           validity (only relevant if PCRE_UTF8
                           was set at compile time)
  PCRE_PARTIAL           ) Return PCRE_ERROR_PARTIAL for a partial
  PCRE_PARTIAL_SOFT      )   match if no full matches are found
  PCRE_PARTIAL_HARD      Return PCRE_ERROR_PARTIAL for a partial match
                           even if there is a full match as well
</pre>
For details of partial matching, see the
<a href="pcrepartial.html"><b>pcrepartial</b></a>
page. A <b>pcre_extra</b> structure contains the following fields:
<pre>
  <i>flags</i>        Bits indicating which fields are set
  <i>study_data</i>   Opaque data from <b>pcre_study()</b>
  <i>match_limit</i>  Limit on internal resource use
  <i>match_limit_recursion</i>  Limit on internal recursion depth
  <i>callout_data</i> Opaque data passed back to callouts
  <i>tables</i>       Points to character tables or is NULL
</pre>
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
PCRE_EXTRA_TABLES.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_get_substring.html000064400000003425150403561470020015 0ustar00<html>
<head>
<title>pcre_get_substring specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_get_substring man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>const char **<i>stringptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting a captured substring. The
arguments are:
<pre>
  <i>subject</i>       Subject that has been successfully matched
  <i>ovector</i>       Offset vector that <b>pcre_exec()</b> used
  <i>stringcount</i>   Value returned by <b>pcre_exec()</b>
  <i>stringnumber</i>  Number of the required substring
  <i>stringptr</i>     Where to put the string pointer
</pre>
The memory in which the substring is placed is obtained by calling
<b>pcre_malloc()</b>. The convenience function <b>pcre_free_substring()</b> can
be used to free it when it is no longer needed. The yield of the function is
the length of the substring, PCRE_ERROR_NOMEMORY if sufficient memory could not
be obtained, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_free_substring_list.html000064400000002142150403561470021205 0ustar00<html>
<head>
<title>pcre_free_substring_list specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_free_substring_list man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for freeing the store obtained by a previous
call to <b>pcre_get_substring_list()</b>. Its only argument is a pointer to the
list of string pointers.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcrebuild.html000064400000035716150403561470016106 0ustar00<html>
<head>
<title>pcrebuild specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrebuild man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE BUILD-TIME OPTIONS</a>
<li><a name="TOC2" href="#SEC2">C++ SUPPORT</a>
<li><a name="TOC3" href="#SEC3">UTF-8 SUPPORT</a>
<li><a name="TOC4" href="#SEC4">UNICODE CHARACTER PROPERTY SUPPORT</a>
<li><a name="TOC5" href="#SEC5">CODE VALUE OF NEWLINE</a>
<li><a name="TOC6" href="#SEC6">WHAT \R MATCHES</a>
<li><a name="TOC7" href="#SEC7">BUILDING SHARED AND STATIC LIBRARIES</a>
<li><a name="TOC8" href="#SEC8">POSIX MALLOC USAGE</a>
<li><a name="TOC9" href="#SEC9">HANDLING VERY LARGE PATTERNS</a>
<li><a name="TOC10" href="#SEC10">AVOIDING EXCESSIVE STACK USAGE</a>
<li><a name="TOC11" href="#SEC11">LIMITING PCRE RESOURCE USAGE</a>
<li><a name="TOC12" href="#SEC12">CREATING CHARACTER TABLES AT BUILD TIME</a>
<li><a name="TOC13" href="#SEC13">USING EBCDIC CODE</a>
<li><a name="TOC14" href="#SEC14">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a>
<li><a name="TOC15" href="#SEC15">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a>
<li><a name="TOC16" href="#SEC16">SEE ALSO</a>
<li><a name="TOC17" href="#SEC17">AUTHOR</a>
<li><a name="TOC18" href="#SEC18">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE BUILD-TIME OPTIONS</a><br>
<P>
This document describes the optional features of PCRE that can be selected when
the library is compiled. It assumes use of the <b>configure</b> script, where
the optional features are selected or deselected by providing options to
<b>configure</b> before running the <b>make</b> command. However, the same
options can be selected in both Unix-like and non-Unix-like environments using
the GUI facility of <b>cmake-gui</b> if you are using <b>CMake</b> instead of
<b>configure</b> to build PCRE.
</P>
<P>
There is a lot more information about building PCRE in non-Unix-like
environments in the file called <i>NON_UNIX_USE</i>, which is part of the PCRE
distribution. You should consult this file as well as the <i>README</i> file if
you are building in a non-Unix-like environment.
</P>
<P>
The complete list of options for <b>configure</b> (which includes the standard
ones such as the selection of the installation directory) can be obtained by
running
<pre>
  ./configure --help
</pre>
The following sections include descriptions of options whose names begin with
--enable or --disable. These settings specify changes to the defaults for the
<b>configure</b> command. Because of the way that <b>configure</b> works,
--enable and --disable always come in pairs, so the complementary option always
exists as well, but as it specifies the default, it is not described.
</P>
<br><a name="SEC2" href="#TOC1">C++ SUPPORT</a><br>
<P>
By default, the <b>configure</b> script will search for a C++ compiler and C++
header files. If it finds them, it automatically builds the C++ wrapper library
for PCRE. You can disable this by adding
<pre>
  --disable-cpp
</pre>
to the <b>configure</b> command.
</P>
<br><a name="SEC3" href="#TOC1">UTF-8 SUPPORT</a><br>
<P>
To build PCRE with support for UTF-8 Unicode character strings, add
<pre>
  --enable-utf8
</pre>
to the <b>configure</b> command. Of itself, this does not make PCRE treat
strings as UTF-8. As well as compiling PCRE with this option, you also have
have to set the PCRE_UTF8 option when you call the <b>pcre_compile()</b>
or <b>pcre_compile2()</b> functions.
</P>
<P>
If you set --enable-utf8 when compiling in an EBCDIC environment, PCRE expects
its input to be either ASCII or UTF-8 (depending on the runtime option). It is
not possible to support both EBCDIC and UTF-8 codes in the same version of the
library. Consequently, --enable-utf8 and --enable-ebcdic are mutually
exclusive.
</P>
<br><a name="SEC4" href="#TOC1">UNICODE CHARACTER PROPERTY SUPPORT</a><br>
<P>
UTF-8 support allows PCRE to process character values greater than 255 in the
strings that it handles. On its own, however, it does not provide any
facilities for accessing the properties of such characters. If you want to be
able to use the pattern escapes \P, \p, and \X, which refer to Unicode
character properties, you must add
<pre>
  --enable-unicode-properties
</pre>
to the <b>configure</b> command. This implies UTF-8 support, even if you have
not explicitly requested it.
</P>
<P>
Including Unicode property support adds around 30K of tables to the PCRE
library. Only the general category properties such as <i>Lu</i> and <i>Nd</i> are
supported. Details are given in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation.
</P>
<br><a name="SEC5" href="#TOC1">CODE VALUE OF NEWLINE</a><br>
<P>
By default, PCRE interprets the linefeed (LF) character as indicating the end
of a line. This is the normal newline character on Unix-like systems. You can
compile PCRE to use carriage return (CR) instead, by adding
<pre>
  --enable-newline-is-cr
</pre>
to the <b>configure</b> command. There is also a --enable-newline-is-lf option,
which explicitly specifies linefeed as the newline character.
<br>
<br>
Alternatively, you can specify that line endings are to be indicated by the two
character sequence CRLF. If you want this, add
<pre>
  --enable-newline-is-crlf
</pre>
to the <b>configure</b> command. There is a fourth option, specified by
<pre>
  --enable-newline-is-anycrlf
</pre>
which causes PCRE to recognize any of the three sequences CR, LF, or CRLF as
indicating a line ending. Finally, a fifth option, specified by
<pre>
  --enable-newline-is-any
</pre>
causes PCRE to recognize any Unicode newline sequence.
</P>
<P>
Whatever line ending convention is selected when PCRE is built can be
overridden when the library functions are called. At build time it is
conventional to use the standard for your operating system.
</P>
<br><a name="SEC6" href="#TOC1">WHAT \R MATCHES</a><br>
<P>
By default, the sequence \R in a pattern matches any Unicode newline sequence,
whatever has been selected as the line ending sequence. If you specify
<pre>
  --enable-bsr-anycrlf
</pre>
the default is changed so that \R matches only CR, LF, or CRLF. Whatever is
selected when PCRE is built can be overridden when the library functions are
called.
</P>
<br><a name="SEC7" href="#TOC1">BUILDING SHARED AND STATIC LIBRARIES</a><br>
<P>
The PCRE building process uses <b>libtool</b> to build both shared and static
Unix libraries by default. You can suppress one of these by adding one of
<pre>
  --disable-shared
  --disable-static
</pre>
to the <b>configure</b> command, as required.
</P>
<br><a name="SEC8" href="#TOC1">POSIX MALLOC USAGE</a><br>
<P>
When PCRE is called through the POSIX interface (see the
<a href="pcreposix.html"><b>pcreposix</b></a>
documentation), additional working storage is required for holding the pointers
to capturing substrings, because PCRE requires three integers per substring,
whereas the POSIX interface provides only two. If the number of expected
substrings is small, the wrapper function uses space on the stack, because this
is faster than using <b>malloc()</b> for each call. The default threshold above
which the stack is no longer used is 10; it can be changed by adding a setting
such as
<pre>
  --with-posix-malloc-threshold=20
</pre>
to the <b>configure</b> command.
</P>
<br><a name="SEC9" href="#TOC1">HANDLING VERY LARGE PATTERNS</a><br>
<P>
Within a compiled pattern, offset values are used to point from one part to
another (for example, from an opening parenthesis to an alternation
metacharacter). By default, two-byte values are used for these offsets, leading
to a maximum size for a compiled pattern of around 64K. This is sufficient to
handle all but the most gigantic patterns. Nevertheless, some people do want to
process truyl enormous patterns, so it is possible to compile PCRE to use
three-byte or four-byte offsets by adding a setting such as
<pre>
  --with-link-size=3
</pre>
to the <b>configure</b> command. The value given must be 2, 3, or 4. Using
longer offsets slows down the operation of PCRE because it has to load
additional bytes when handling them.
</P>
<br><a name="SEC10" href="#TOC1">AVOIDING EXCESSIVE STACK USAGE</a><br>
<P>
When matching with the <b>pcre_exec()</b> function, PCRE implements backtracking
by making recursive calls to an internal function called <b>match()</b>. In
environments where the size of the stack is limited, this can severely limit
PCRE's operation. (The Unix environment does not usually suffer from this
problem, but it may sometimes be necessary to increase the maximum stack size.
There is a discussion in the
<a href="pcrestack.html"><b>pcrestack</b></a>
documentation.) An alternative approach to recursion that uses memory from the
heap to remember data, instead of using recursive function calls, has been
implemented to work round the problem of limited stack size. If you want to
build a version of PCRE that works this way, add
<pre>
  --disable-stack-for-recursion
</pre>
to the <b>configure</b> command. With this configuration, PCRE will use the
<b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> variables to call memory
management functions. By default these point to <b>malloc()</b> and
<b>free()</b>, but you can replace the pointers so that your own functions are
used instead.
</P>
<P>
Separate functions are provided rather than using <b>pcre_malloc</b> and
<b>pcre_free</b> because the usage is very predictable: the block sizes
requested are always the same, and the blocks are always freed in reverse
order. A calling program might be able to implement optimized functions that
perform better than <b>malloc()</b> and <b>free()</b>. PCRE runs noticeably more
slowly when built in this way. This option affects only the <b>pcre_exec()</b>
function; it is not relevant for <b>pcre_dfa_exec()</b>.
</P>
<br><a name="SEC11" href="#TOC1">LIMITING PCRE RESOURCE USAGE</a><br>
<P>
Internally, PCRE has a function called <b>match()</b>, which it calls repeatedly
(sometimes recursively) when matching a pattern with the <b>pcre_exec()</b>
function. By controlling the maximum number of times this function may be
called during a single matching operation, a limit can be placed on the
resources used by a single call to <b>pcre_exec()</b>. The limit can be changed
at run time, as described in the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation. The default is 10 million, but this can be changed by adding a
setting such as
<pre>
  --with-match-limit=500000
</pre>
to the <b>configure</b> command. This setting has no effect on the
<b>pcre_dfa_exec()</b> matching function.
</P>
<P>
In some environments it is desirable to limit the depth of recursive calls of
<b>match()</b> more strictly than the total number of calls, in order to
restrict the maximum amount of stack (or heap, if --disable-stack-for-recursion
is specified) that is used. A second limit controls this; it defaults to the
value that is set for --with-match-limit, which imposes no additional
constraints. However, you can set a lower limit by adding, for example,
<pre>
  --with-match-limit-recursion=10000
</pre>
to the <b>configure</b> command. This value can also be overridden at run time.
</P>
<br><a name="SEC12" href="#TOC1">CREATING CHARACTER TABLES AT BUILD TIME</a><br>
<P>
PCRE uses fixed tables for processing characters whose code values are less
than 256. By default, PCRE is built with a set of tables that are distributed
in the file <i>pcre_chartables.c.dist</i>. These tables are for ASCII codes
only. If you add
<pre>
  --enable-rebuild-chartables
</pre>
to the <b>configure</b> command, the distributed tables are no longer used.
Instead, a program called <b>dftables</b> is compiled and run. This outputs the
source for new set of tables, created in the default locale of your C runtime
system. (This method of replacing the tables does not work if you are cross
compiling, because <b>dftables</b> is run on the local host. If you need to
create alternative tables when cross compiling, you will have to do so "by
hand".)
</P>
<br><a name="SEC13" href="#TOC1">USING EBCDIC CODE</a><br>
<P>
PCRE assumes by default that it will run in an environment where the character
code is ASCII (or Unicode, which is a superset of ASCII). This is the case for
most computer operating systems. PCRE can, however, be compiled to run in an
EBCDIC environment by adding
<pre>
  --enable-ebcdic
</pre>
to the <b>configure</b> command. This setting implies
--enable-rebuild-chartables. You should only use it if you know that you are in
an EBCDIC environment (for example, an IBM mainframe operating system). The
--enable-ebcdic option is incompatible with --enable-utf8.
</P>
<br><a name="SEC14" href="#TOC1">PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT</a><br>
<P>
By default, <b>pcregrep</b> reads all files as plain text. You can build it so
that it recognizes files whose names end in <b>.gz</b> or <b>.bz2</b>, and reads
them with <b>libz</b> or <b>libbz2</b>, respectively, by adding one or both of
<pre>
  --enable-pcregrep-libz
  --enable-pcregrep-libbz2
</pre>
to the <b>configure</b> command. These options naturally require that the
relevant libraries are installed on your system. Configuration will fail if
they are not.
</P>
<br><a name="SEC15" href="#TOC1">PCRETEST OPTION FOR LIBREADLINE SUPPORT</a><br>
<P>
If you add
<pre>
  --enable-pcretest-libreadline
</pre>
to the <b>configure</b> command, <b>pcretest</b> is linked with the
<b>libreadline</b> library, and when its input is from a terminal, it reads it
using the <b>readline()</b> function. This provides line-editing and history
facilities. Note that <b>libreadline</b> is GPL-licensed, so if you distribute a
binary of <b>pcretest</b> linked in this way, there may be licensing issues.
</P>
<P>
Setting this option causes the <b>-lreadline</b> option to be added to the
<b>pcretest</b> build. In many operating environments with a sytem-installed
<b>libreadline</b> this is sufficient. However, in some environments (e.g.
if an unmodified distribution version of readline is in use), some extra
configuration may be necessary. The INSTALL file for <b>libreadline</b> says
this:
<pre>
  "Readline uses the termcap functions, but does not link with the
  termcap or curses library itself, allowing applications which link
  with readline the to choose an appropriate library."
</pre>
If your environment has not been set up so that an appropriate library is
automatically included, you may need to add something like
<pre>
  LIBS="-ncurses"
</pre>
immediately before the <b>configure</b> command.
</P>
<br><a name="SEC16" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcreapi</b>(3), <b>pcre_config</b>(3).
</P>
<br><a name="SEC17" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC18" href="#TOC1">REVISION</a><br>
<P>
Last updated: 29 September 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_compile.html000064400000006712150403561470016570 0ustar00<html>
<head>
<title>pcre_compile specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_compile man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function compiles a regular expression into an internal form. It is the
same as <b>pcre_compile2()</b>, except for the absence of the <i>errorcodeptr</i>
argument. Its arguments are:
<pre>
  <i>pattern</i>       A zero-terminated string containing the
                  regular expression to be compiled
  <i>options</i>       Zero or more option bits
  <i>errptr</i>        Where to put an error message
  <i>erroffset</i>     Offset in pattern where error was found
  <i>tableptr</i>      Pointer to character tables, or NULL to
                  use the built-in default
</pre>
The option bits are:
<pre>
  PCRE_ANCHORED           Force pattern anchoring
  PCRE_AUTO_CALLOUT       Compile automatic callouts
  PCRE_BSR_ANYCRLF        \R matches only CR, LF, or CRLF
  PCRE_BSR_UNICODE        \R matches all Unicode line endings
  PCRE_CASELESS           Do caseless matching
  PCRE_DOLLAR_ENDONLY     $ not to match newline at end
  PCRE_DOTALL             . matches anything including NL
  PCRE_DUPNAMES           Allow duplicate names for subpatterns
  PCRE_EXTENDED           Ignore whitespace and # comments
  PCRE_EXTRA              PCRE extra features
                            (not much use currently)
  PCRE_FIRSTLINE          Force matching to be before newline
  PCRE_JAVASCRIPT_COMPAT  JavaScript compatibility
  PCRE_MULTILINE          ^ and $ match newlines within data
  PCRE_NEWLINE_ANY        Recognize any Unicode newline sequence
  PCRE_NEWLINE_ANYCRLF    Recognize CR, LF, and CRLF as newline
                            sequences
  PCRE_NEWLINE_CR         Set CR as the newline sequence
  PCRE_NEWLINE_CRLF       Set CRLF as the newline sequence
  PCRE_NEWLINE_LF         Set LF as the newline sequence
  PCRE_NO_AUTO_CAPTURE    Disable numbered capturing paren-
                            theses (named ones available)
  PCRE_NO_UTF8_CHECK      Do not check the pattern for UTF-8
                            validity (only relevant if
                            PCRE_UTF8 is set)
  PCRE_UNGREEDY           Invert greediness of quantifiers
  PCRE_UTF8               Run in UTF-8 mode
</pre>
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
PCRE_NO_UTF8_CHECK.
</P>
<P>
The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected. Note that
compiling regular expressions with one version of PCRE for use with a different
version is not guaranteed to work and may cause crashes.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcregrep.html000064400000063547150403561470015747 0ustar00<html>
<head>
<title>pcregrep specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcregrep man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">SUPPORT FOR COMPRESSED FILES</a>
<li><a name="TOC4" href="#SEC4">OPTIONS</a>
<li><a name="TOC5" href="#SEC5">ENVIRONMENT VARIABLES</a>
<li><a name="TOC6" href="#SEC6">NEWLINES</a>
<li><a name="TOC7" href="#SEC7">OPTIONS COMPATIBILITY</a>
<li><a name="TOC8" href="#SEC8">OPTIONS WITH DATA</a>
<li><a name="TOC9" href="#SEC9">MATCHING ERRORS</a>
<li><a name="TOC10" href="#SEC10">DIAGNOSTICS</a>
<li><a name="TOC11" href="#SEC11">SEE ALSO</a>
<li><a name="TOC12" href="#SEC12">AUTHOR</a>
<li><a name="TOC13" href="#SEC13">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>pcregrep [options] [long options] [pattern] [path1 path2 ...]</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
<b>pcregrep</b> searches files for character patterns, in the same way as other
grep commands do, but it uses the PCRE regular expression library to support
patterns that are compatible with the regular expressions of Perl 5. See
<a href="pcrepattern.html"><b>pcrepattern</b>(3)</a>
for a full description of syntax and semantics of the regular expressions
that PCRE supports.
</P>
<P>
Patterns, whether supplied on the command line or in a separate file, are given
without delimiters. For example:
<pre>
  pcregrep Thursday /etc/motd
</pre>
If you attempt to use delimiters (for example, by surrounding a pattern with
slashes, as is common in Perl scripts), they are interpreted as part of the
pattern. Quotes can of course be used to delimit patterns on the command line
because they are interpreted by the shell, and indeed they are required if a
pattern contains white space or shell metacharacters.
</P>
<P>
The first argument that follows any option settings is treated as the single
pattern to be matched when neither <b>-e</b> nor <b>-f</b> is present.
Conversely, when one or both of these options are used to specify patterns, all
arguments are treated as path names. At least one of <b>-e</b>, <b>-f</b>, or an
argument pattern must be provided.
</P>
<P>
If no files are specified, <b>pcregrep</b> reads the standard input. The
standard input can also be referenced by a name consisting of a single hyphen.
For example:
<pre>
  pcregrep some-pattern /file1 - /file3
</pre>
By default, each line that matches a pattern is copied to the standard
output, and if there is more than one file, the file name is output at the
start of each line, followed by a colon. However, there are options that can
change how <b>pcregrep</b> behaves. In particular, the <b>-M</b> option makes it
possible to search for patterns that span line boundaries. What defines a line
boundary is controlled by the <b>-N</b> (<b>--newline</b>) option.
</P>
<P>
Patterns are limited to 8K or BUFSIZ characters, whichever is the greater.
BUFSIZ is defined in <b>&#60;stdio.h&#62;</b>. When there is more than one pattern
(specified by the use of <b>-e</b> and/or <b>-f</b>), each pattern is applied to
each line in the order in which they are defined, except that all the <b>-e</b>
patterns are tried before the <b>-f</b> patterns.
</P>
<P>
By default, as soon as one pattern matches (or fails to match when <b>-v</b> is
used), no further patterns are considered. However, if <b>--colour</b> (or
<b>--color</b>) is used to colour the matching substrings, or if
<b>--only-matching</b>, <b>--file-offsets</b>, or <b>--line-offsets</b> is used to
output only the part of the line that matched (either shown literally, or as an
offset), scanning resumes immediately following the match, so that further
matches on the same line can be found. If there are multiple patterns, they are
all tried on the remainder of the line, but patterns that follow the one that
matched are not tried on the earlier part of the line.
</P>
<P>
This is the same behaviour as GNU grep, but it does mean that the order in
which multiple patterns are specified can affect the output when one of the
above options is used.
</P>
<P>
Patterns that can match an empty string are accepted, but empty string
matches are never recognized. An example is the pattern "(super)?(man)?", in
which all components are optional. This pattern finds all occurrences of both
"super" and "man"; the output differs from matching with "super|man" when only
the matching substrings are being shown.
</P>
<P>
If the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variable is set,
<b>pcregrep</b> uses the value to set a locale when calling the PCRE library.
The <b>--locale</b> option can be used to override this.
</P>
<br><a name="SEC3" href="#TOC1">SUPPORT FOR COMPRESSED FILES</a><br>
<P>
It is possible to compile <b>pcregrep</b> so that it uses <b>libz</b> or
<b>libbz2</b> to read files whose names end in <b>.gz</b> or <b>.bz2</b>,
respectively. You can find out whether your binary has support for one or both
of these file types by running it with the <b>--help</b> option. If the
appropriate support is not present, files are treated as plain text. The
standard input is always so treated.
</P>
<br><a name="SEC4" href="#TOC1">OPTIONS</a><br>
<P>
The order in which some of the options appear can affect the output. For
example, both the <b>-h</b> and <b>-l</b> options affect the printing of file
names. Whichever comes later in the command line will be the one that takes
effect.
</P>
<P>
<b>--</b>
This terminate the list of options. It is useful if the next item on the
command line starts with a hyphen but is not an option. This allows for the
processing of patterns and filenames that start with hyphens.
</P>
<P>
<b>-A</b> <i>number</i>, <b>--after-context=</b><i>number</i>
Output <i>number</i> lines of context after each matching line. If filenames
and/or line numbers are being output, a hyphen separator is used instead of a
colon for the context lines. A line containing "--" is output between each
group of lines, unless they are in fact contiguous in the input file. The value
of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
guarantees to have up to 8K of following text available for context output.
</P>
<P>
<b>-B</b> <i>number</i>, <b>--before-context=</b><i>number</i>
Output <i>number</i> lines of context before each matching line. If filenames
and/or line numbers are being output, a hyphen separator is used instead of a
colon for the context lines. A line containing "--" is output between each
group of lines, unless they are in fact contiguous in the input file. The value
of <i>number</i> is expected to be relatively small. However, <b>pcregrep</b>
guarantees to have up to 8K of preceding text available for context output.
</P>
<P>
<b>-C</b> <i>number</i>, <b>--context=</b><i>number</i>
Output <i>number</i> lines of context both before and after each matching line.
This is equivalent to setting both <b>-A</b> and <b>-B</b> to the same value.
</P>
<P>
<b>-c</b>, <b>--count</b>
Do not output individual lines from the files that are being scanned; instead
output the number of lines that would otherwise have been shown. If no lines
are selected, the number zero is output. If several files are are being
scanned, a count is output for each of them. However, if the
<b>--files-with-matches</b> option is also used, only those files whose counts
are greater than zero are listed. When <b>-c</b> is used, the <b>-A</b>,
<b>-B</b>, and <b>-C</b> options are ignored.
</P>
<P>
<b>--colour</b>, <b>--color</b>
If this option is given without any data, it is equivalent to "--colour=auto".
If data is required, it must be given in the same shell item, separated by an
equals sign.
</P>
<P>
<b>--colour=</b><i>value</i>, <b>--color=</b><i>value</i>
This option specifies under what circumstances the parts of a line that matched
a pattern should be coloured in the output. By default, the output is not
coloured. The value (which is optional, see above) may be "never", "always", or
"auto". In the latter case, colouring happens only if the standard output is
connected to a terminal. More resources are used when colouring is enabled,
because <b>pcregrep</b> has to search for all possible matches in a line, not
just one, in order to colour them all.
</P>
<P>
The colour that is used can be specified by setting the environment variable
PCREGREP_COLOUR or PCREGREP_COLOR. The value of this variable should be a
string of two numbers, separated by a semicolon. They are copied directly into
the control string for setting colour on a terminal, so it is your
responsibility to ensure that they make sense. If neither of the environment
variables is set, the default is "1;31", which gives red.
</P>
<P>
<b>-D</b> <i>action</i>, <b>--devices=</b><i>action</i>
If an input path is not a regular file or a directory, "action" specifies how
it is to be processed. Valid values are "read" (the default) or "skip"
(silently skip the path).
</P>
<P>
<b>-d</b> <i>action</i>, <b>--directories=</b><i>action</i>
If an input path is a directory, "action" specifies how it is to be processed.
Valid values are "read" (the default), "recurse" (equivalent to the <b>-r</b>
option), or "skip" (silently skip the path). In the default case, directories
are read as if they were ordinary files. In some operating systems the effect
of reading a directory like this is an immediate end-of-file.
</P>
<P>
<b>-e</b> <i>pattern</i>, <b>--regex=</b><i>pattern</i>, <b>--regexp=</b><i>pattern</i>
Specify a pattern to be matched. This option can be used multiple times in
order to specify several patterns. It can also be used as a way of specifying a
single pattern that starts with a hyphen. When <b>-e</b> is used, no argument
pattern is taken from the command line; all arguments are treated as file
names. There is an overall maximum of 100 patterns. They are applied to each
line in the order in which they are defined until one matches (or fails to
match if <b>-v</b> is used). If <b>-f</b> is used with <b>-e</b>, the command line
patterns are matched first, followed by the patterns from the file, independent
of the order in which these options are specified. Note that multiple use of
<b>-e</b> is not the same as a single pattern with alternatives. For example,
X|Y finds the first character in a line that is X or Y, whereas if the two
patterns are given separately, <b>pcregrep</b> finds X if it is present, even if
it follows Y in the line. It finds Y only if there is no X in the line. This
really matters only if you are using <b>-o</b> to show the part(s) of the line
that matched.
</P>
<P>
<b>--exclude</b>=<i>pattern</i>
When <b>pcregrep</b> is searching the files in a directory as a consequence of
the <b>-r</b> (recursive search) option, any regular files whose names match the
pattern are excluded. Subdirectories are not excluded by this option; they are
searched recursively, subject to the <b>--exclude_dir</b> and
<b>--include_dir</b> options. The pattern is a PCRE regular expression, and is
matched against the final component of the file name (not the entire path). If
a file name matches both <b>--include</b> and <b>--exclude</b>, it is excluded.
There is no short form for this option.
</P>
<P>
<b>--exclude_dir</b>=<i>pattern</i>
When <b>pcregrep</b> is searching the contents of a directory as a consequence
of the <b>-r</b> (recursive search) option, any subdirectories whose names match
the pattern are excluded. (Note that the \fP--exclude\fP option does not affect
subdirectories.) The pattern is a PCRE regular expression, and is matched
against the final component of the name (not the entire path). If a
subdirectory name matches both <b>--include_dir</b> and <b>--exclude_dir</b>, it
is excluded. There is no short form for this option.
</P>
<P>
<b>-F</b>, <b>--fixed-strings</b>
Interpret each pattern as a list of fixed strings, separated by newlines,
instead of as a regular expression. The <b>-w</b> (match as a word) and <b>-x</b>
(match whole line) options can be used with <b>-F</b>. They apply to each of the
fixed strings. A line is selected if any of the fixed strings are found in it
(subject to <b>-w</b> or <b>-x</b>, if present).
</P>
<P>
<b>-f</b> <i>filename</i>, <b>--file=</b><i>filename</i>
Read a number of patterns from the file, one per line, and match them against
each line of input. A data line is output if any of the patterns match it. The
filename can be given as "-" to refer to the standard input. When <b>-f</b> is
used, patterns specified on the command line using <b>-e</b> may also be
present; they are tested before the file's patterns. However, no other pattern
is taken from the command line; all arguments are treated as file names. There
is an overall maximum of 100 patterns. Trailing white space is removed from
each line, and blank lines are ignored. An empty file contains no patterns and
therefore matches nothing. See also the comments about multiple patterns versus
a single pattern with alternatives in the description of <b>-e</b> above.
</P>
<P>
<b>--file-offsets</b>
Instead of showing lines or parts of lines that match, show each match as an
offset from the start of the file and a length, separated by a comma. In this
mode, no context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b>
options are ignored. If there is more than one match in a line, each of them is
shown separately. This option is mutually exclusive with <b>--line-offsets</b>
and <b>--only-matching</b>.
</P>
<P>
<b>-H</b>, <b>--with-filename</b>
Force the inclusion of the filename at the start of output lines when searching
a single file. By default, the filename is not shown in this case. For matching
lines, the filename is followed by a colon; for context lines, a hyphen
separator is used. If a line number is also being output, it follows the file
name.
</P>
<P>
<b>-h</b>, <b>--no-filename</b>
Suppress the output filenames when searching multiple files. By default,
filenames are shown when multiple files are searched. For matching lines, the
filename is followed by a colon; for context lines, a hyphen separator is used.
If a line number is also being output, it follows the file name.
</P>
<P>
<b>--help</b>
Output a help message, giving brief details of the command options and file
type support, and then exit.
</P>
<P>
<b>-i</b>, <b>--ignore-case</b>
Ignore upper/lower case distinctions during comparisons.
</P>
<P>
<b>--include</b>=<i>pattern</i>
When <b>pcregrep</b> is searching the files in a directory as a consequence of
the <b>-r</b> (recursive search) option, only those regular files whose names
match the pattern are included. Subdirectories are always included and searched
recursively, subject to the \fP--include_dir\fP and <b>--exclude_dir</b>
options. The pattern is a PCRE regular expression, and is matched against the
final component of the file name (not the entire path). If a file name matches
both <b>--include</b> and <b>--exclude</b>, it is excluded. There is no short
form for this option.
</P>
<P>
<b>--include_dir</b>=<i>pattern</i>
When <b>pcregrep</b> is searching the contents of a directory as a consequence
of the <b>-r</b> (recursive search) option, only those subdirectories whose
names match the pattern are included. (Note that the <b>--include</b> option
does not affect subdirectories.) The pattern is a PCRE regular expression, and
is matched against the final component of the name (not the entire path). If a
subdirectory name matches both <b>--include_dir</b> and <b>--exclude_dir</b>, it
is excluded. There is no short form for this option.
</P>
<P>
<b>-L</b>, <b>--files-without-match</b>
Instead of outputting lines from the files, just output the names of the files
that do not contain any lines that would have been output. Each file name is
output once, on a separate line.
</P>
<P>
<b>-l</b>, <b>--files-with-matches</b>
Instead of outputting lines from the files, just output the names of the files
containing lines that would have been output. Each file name is output
once, on a separate line. Searching normally stops as soon as a matching line
is found in a file. However, if the <b>-c</b> (count) option is also used,
matching continues in order to obtain the correct count, and those files that
have at least one match are listed along with their counts. Using this option
with <b>-c</b> is a way of suppressing the listing of files with no matches.
</P>
<P>
<b>--label</b>=<i>name</i>
This option supplies a name to be used for the standard input when file names
are being output. If not supplied, "(standard input)" is used. There is no
short form for this option.
</P>
<P>
<b>--line-offsets</b>
Instead of showing lines or parts of lines that match, show each match as a
line number, the offset from the start of the line, and a length. The line
number is terminated by a colon (as usual; see the <b>-n</b> option), and the
offset and length are separated by a comma. In this mode, no context is shown.
That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are ignored. If there is
more than one match in a line, each of them is shown separately. This option is
mutually exclusive with <b>--file-offsets</b> and <b>--only-matching</b>.
</P>
<P>
<b>--locale</b>=<i>locale-name</i>
This option specifies a locale to be used for pattern matching. It overrides
the value in the <b>LC_ALL</b> or <b>LC_CTYPE</b> environment variables. If no
locale is specified, the PCRE library's default (usually the "C" locale) is
used. There is no short form for this option.
</P>
<P>
<b>-M</b>, <b>--multiline</b>
Allow patterns to match more than one line. When this option is given, patterns
may usefully contain literal newline characters and internal occurrences of ^
and $ characters. The output for any one match may consist of more than one
line. When this option is set, the PCRE library is called in "multiline" mode.
There is a limit to the number of lines that can be matched, imposed by the way
that <b>pcregrep</b> buffers the input file as it scans it. However,
<b>pcregrep</b> ensures that at least 8K characters or the rest of the document
(whichever is the shorter) are available for forward matching, and similarly
the previous 8K characters (or all the previous characters, if fewer than 8K)
are guaranteed to be available for lookbehind assertions.
</P>
<P>
<b>-N</b> <i>newline-type</i>, <b>--newline=</b><i>newline-type</i>
The PCRE library supports five different conventions for indicating
the ends of lines. They are the single-character sequences CR (carriage return)
and LF (linefeed), the two-character sequence CRLF, an "anycrlf" convention,
which recognizes any of the preceding three types, and an "any" convention, in
which any Unicode line ending sequence is assumed to end a line. The Unicode
sequences are the three just mentioned, plus VT (vertical tab, U+000B), FF
(formfeed, U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and
PS (paragraph separator, U+2029).
<br>
<br>
When the PCRE library is built, a default line-ending sequence is specified.
This is normally the standard sequence for the operating system. Unless
otherwise specified by this option, <b>pcregrep</b> uses the library's default.
The possible values for this option are CR, LF, CRLF, ANYCRLF, or ANY. This
makes it possible to use <b>pcregrep</b> on files that have come from other
environments without having to modify their line endings. If the data that is
being scanned does not agree with the convention set by this option,
<b>pcregrep</b> may behave in strange ways.
</P>
<P>
<b>-n</b>, <b>--line-number</b>
Precede each output line by its line number in the file, followed by a colon
for matching lines or a hyphen for context lines. If the filename is also being
output, it precedes the line number. This option is forced if
<b>--line-offsets</b> is used.
</P>
<P>
<b>-o</b>, <b>--only-matching</b>
Show only the part of the line that matched a pattern. In this mode, no
context is shown. That is, the <b>-A</b>, <b>-B</b>, and <b>-C</b> options are
ignored. If there is more than one match in a line, each of them is shown
separately. If <b>-o</b> is combined with <b>-v</b> (invert the sense of the
match to find non-matching lines), no output is generated, but the return code
is set appropriately. This option is mutually exclusive with
<b>--file-offsets</b> and <b>--line-offsets</b>.
</P>
<P>
<b>-q</b>, <b>--quiet</b>
Work quietly, that is, display nothing except error messages. The exit
status indicates whether or not any matches were found.
</P>
<P>
<b>-r</b>, <b>--recursive</b>
If any given path is a directory, recursively scan the files it contains,
taking note of any <b>--include</b> and <b>--exclude</b> settings. By default, a
directory is read as a normal file; in some operating systems this gives an
immediate end-of-file. This option is a shorthand for setting the <b>-d</b>
option to "recurse".
</P>
<P>
<b>-s</b>, <b>--no-messages</b>
Suppress error messages about non-existent or unreadable files. Such files are
quietly skipped. However, the return code is still 2, even if matches were
found in other files.
</P>
<P>
<b>-u</b>, <b>--utf-8</b>
Operate in UTF-8 mode. This option is available only if PCRE has been compiled
with UTF-8 support. Both patterns and subject lines must be valid strings of
UTF-8 characters.
</P>
<P>
<b>-V</b>, <b>--version</b>
Write the version numbers of <b>pcregrep</b> and the PCRE library that is being
used to the standard error stream.
</P>
<P>
<b>-v</b>, <b>--invert-match</b>
Invert the sense of the match, so that lines which do <i>not</i> match any of
the patterns are the ones that are found.
</P>
<P>
<b>-w</b>, <b>--word-regex</b>, <b>--word-regexp</b>
Force the patterns to match only whole words. This is equivalent to having \b
at the start and end of the pattern.
</P>
<P>
<b>-x</b>, <b>--line-regex</b>, <b>--line-regexp</b>
Force the patterns to be anchored (each must start matching at the beginning of
a line) and in addition, require them to match entire lines. This is
equivalent to having ^ and $ characters at the start and end of each
alternative branch in every pattern.
</P>
<br><a name="SEC5" href="#TOC1">ENVIRONMENT VARIABLES</a><br>
<P>
The environment variables <b>LC_ALL</b> and <b>LC_CTYPE</b> are examined, in that
order, for a locale. The first one that is set is used. This can be overridden
by the <b>--locale</b> option. If no locale is set, the PCRE library's default
(usually the "C" locale) is used.
</P>
<br><a name="SEC6" href="#TOC1">NEWLINES</a><br>
<P>
The <b>-N</b> (<b>--newline</b>) option allows <b>pcregrep</b> to scan files with
different newline conventions from the default. However, the setting of this
option does not affect the way in which <b>pcregrep</b> writes information to
the standard error and output streams. It uses the string "\n" in C
<b>printf()</b> calls to indicate newlines, relying on the C I/O library to
convert this to an appropriate sequence if the output is sent to a file.
</P>
<br><a name="SEC7" href="#TOC1">OPTIONS COMPATIBILITY</a><br>
<P>
The majority of short and long forms of <b>pcregrep</b>'s options are the same
as in the GNU <b>grep</b> program. Any long option of the form
<b>--xxx-regexp</b> (GNU terminology) is also available as <b>--xxx-regex</b>
(PCRE terminology). However, the <b>--locale</b>, <b>-M</b>, <b>--multiline</b>,
<b>-u</b>, and <b>--utf-8</b> options are specific to <b>pcregrep</b>. If both the
<b>-c</b> and <b>-l</b> options are given, GNU grep lists only file names,
without counts, but <b>pcregrep</b> gives the counts.
</P>
<br><a name="SEC8" href="#TOC1">OPTIONS WITH DATA</a><br>
<P>
There are four different ways in which an option with data can be specified.
If a short form option is used, the data may follow immediately, or in the next
command line item. For example:
<pre>
  -f/some/file
  -f /some/file
</pre>
If a long form option is used, the data may appear in the same command line
item, separated by an equals character, or (with one exception) it may appear
in the next command line item. For example:
<pre>
  --file=/some/file
  --file /some/file
</pre>
Note, however, that if you want to supply a file name beginning with ~ as data
in a shell command, and have the shell expand ~ to a home directory, you must
separate the file name from the option, because the shell does not treat ~
specially unless it is at the start of an item.
</P>
<P>
The exception to the above is the <b>--colour</b> (or <b>--color</b>) option,
for which the data is optional. If this option does have data, it must be given
in the first form, using an equals character. Otherwise it will be assumed that
it has no data.
</P>
<br><a name="SEC9" href="#TOC1">MATCHING ERRORS</a><br>
<P>
It is possible to supply a regular expression that takes a very long time to
fail to match certain lines. Such patterns normally involve nested indefinite
repeats, for example: (a+)*\d when matched against a line of a's with no final
digit. The PCRE matching function has a resource limit that causes it to abort
in these circumstances. If this happens, <b>pcregrep</b> outputs an error
message and the line that caused the problem to the standard error stream. If
there are more than 20 such errors, <b>pcregrep</b> gives up.
</P>
<br><a name="SEC10" href="#TOC1">DIAGNOSTICS</a><br>
<P>
Exit status is 0 if any matches were found, 1 if no matches were found, and 2
for syntax errors and non-existent or inacessible files (even if matches were
found in other files) or too many matching errors. Using the <b>-s</b> option to
suppress error messages about inaccessble files does not affect the return
code.
</P>
<br><a name="SEC11" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcrepattern</b>(3), <b>pcretest</b>(1).
</P>
<br><a name="SEC12" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC13" href="#TOC1">REVISION</a><br>
<P>
Last updated: 13 September 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_get_substring_list.html000064400000003457150403561470021055 0ustar00<html>
<head>
<title>pcre_get_substring_list specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_get_substring_list man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting a list of all the captured
substrings. The arguments are:
<pre>
  <i>subject</i>       Subject that has been successfully matched
  <i>ovector</i>       Offset vector that <b>pcre_exec</b> used
  <i>stringcount</i>   Value returned by <b>pcre_exec</b>
  <i>listptr</i>       Where to put a pointer to the list
</pre>
The memory in which the substrings and the list are placed is obtained by
calling <b>pcre_malloc()</b>. The convenience function
<b>pcre_free_substring_list()</b> can be used to free it when it is no longer
needed. A pointer to a list of pointers is put in the variable whose address is
in <i>listptr</i>. The list is terminated by a NULL pointer. The yield of the
function is zero on success or PCRE_ERROR_NOMEMORY if sufficient memory could
not be obtained.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_copy_named_substring.html000064400000003400150403561470021345 0ustar00<html>
<head>
<title>pcre_copy_named_substring specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_copy_named_substring man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting a captured substring, identified
by name, into a given buffer. The arguments are:
<pre>
  <i>code</i>          Pattern that was successfully matched
  <i>subject</i>       Subject that has been successfully matched
  <i>ovector</i>       Offset vector that <b>pcre_exec()</b> used
  <i>stringcount</i>   Value returned by <b>pcre_exec()</b>
  <i>stringname</i>    Name of the required substring
  <i>buffer</i>        Buffer to receive the string
  <i>buffersize</i>    Size of buffer
</pre>
The yield is the length of the substring, PCRE_ERROR_NOMEMORY if the buffer was
too small, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_dfa_exec.html000064400000010710150403561470016667 0ustar00<html>
<head>
<title>pcre_dfa_exec specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_dfa_exec man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function matches a compiled regular expression against a given subject
string, using an alternative matching algorithm that scans the subject string
just once (<i>not</i> Perl-compatible). Note that the main, Perl-compatible,
matching function is <b>pcre_exec()</b>. The arguments for this function are:
<pre>
  <i>code</i>         Points to the compiled pattern
  <i>extra</i>        Points to an associated <b>pcre_extra</b> structure,
                 or is NULL
  <i>subject</i>      Points to the subject string
  <i>length</i>       Length of the subject string, in bytes
  <i>startoffset</i>  Offset in bytes in the subject at which to
                 start matching
  <i>options</i>      Option bits
  <i>ovector</i>      Points to a vector of ints for result offsets
  <i>ovecsize</i>     Number of elements in the vector
  <i>workspace</i>    Points to a vector of ints used as working space
  <i>wscount</i>      Number of elements in the vector
</pre>
The options are:
<pre>
  PCRE_ANCHORED          Match only at the first position
  PCRE_BSR_ANYCRLF       \R matches only CR, LF, or CRLF
  PCRE_BSR_UNICODE       \R matches all Unicode line endings
  PCRE_NEWLINE_ANY       Recognize any Unicode newline sequence
  PCRE_NEWLINE_ANYCRLF   Recognize CR, LF, & CRLF as newline sequences
  PCRE_NEWLINE_CR        Recognize CR as the only newline sequence
  PCRE_NEWLINE_CRLF      Recognize CRLF as the only newline sequence
  PCRE_NEWLINE_LF        Recognize LF as the only newline sequence
  PCRE_NOTBOL            Subject is not the beginning of a line
  PCRE_NOTEOL            Subject is not the end of a line
  PCRE_NOTEMPTY          An empty string is not a valid match
  PCRE_NOTEMPTY_ATSTART  An empty string at the start of the subject
                           is not a valid match
  PCRE_NO_START_OPTIMIZE Do not do "start-match" optimizations
  PCRE_NO_UTF8_CHECK     Do not check the subject for UTF-8
                           validity (only relevant if PCRE_UTF8
                           was set at compile time)
  PCRE_PARTIAL           ) Return PCRE_ERROR_PARTIAL for a partial
  PCRE_PARTIAL_SOFT      )   match if no full matches are found
  PCRE_PARTIAL_HARD      Return PCRE_ERROR_PARTIAL for a partial match
                           even if there is a full match as well
  PCRE_DFA_SHORTEST      Return only the shortest match
  PCRE_DFA_RESTART       Restart after a partial match
</pre>
There are restrictions on what may appear in a pattern when using this matching
function. Details are given in the
<a href="pcrematching.html"><b>pcrematching</b></a>
documentation. For details of partial matching, see the
<a href="pcrepartial.html"><b>pcrepartial</b></a>
page.
</P>
<P>
A <b>pcre_extra</b> structure contains the following fields:
<pre>
  <i>flags</i>        Bits indicating which fields are set
  <i>study_data</i>   Opaque data from <b>pcre_study()</b>
  <i>match_limit</i>  Limit on internal resource use
  <i>match_limit_recursion</i>  Limit on internal recursion depth
  <i>callout_data</i> Opaque data passed back to callouts
  <i>tables</i>       Points to character tables or is NULL
</pre>
The flag bits are PCRE_EXTRA_STUDY_DATA, PCRE_EXTRA_MATCH_LIMIT,
PCRE_EXTRA_MATCH_LIMIT_RECURSION, PCRE_EXTRA_CALLOUT_DATA, and
PCRE_EXTRA_TABLES. For this matching function, the <i>match_limit</i> and
<i>match_limit_recursion</i> fields are not used, and must not be set.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre.html000064400000033133150403561470015055 0ustar00<html>
<head>
<title>pcre specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">INTRODUCTION</a>
<li><a name="TOC2" href="#SEC2">USER DOCUMENTATION</a>
<li><a name="TOC3" href="#SEC3">LIMITATIONS</a>
<li><a name="TOC4" href="#SEC4">UTF-8 AND UNICODE PROPERTY SUPPORT</a>
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
<li><a name="TOC6" href="#SEC6">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">INTRODUCTION</a><br>
<P>
The PCRE library is a set of functions that implement regular expression
pattern matching using the same syntax and semantics as Perl, with just a few
differences. Some features that appeared in Python and PCRE before they
appeared in Perl are also available using the Python syntax, there is some
support for one or two .NET and Oniguruma syntax items, and there is an option
for requesting some minor changes that give better JavaScript compatibility.
</P>
<P>
The current implementation of PCRE corresponds approximately with Perl 5.10,
including support for UTF-8 encoded strings and Unicode general category
properties. However, UTF-8 and Unicode support has to be explicitly enabled; it
is not the default. The Unicode tables correspond to Unicode release 5.2.0.
</P>
<P>
In addition to the Perl-compatible matching function, PCRE contains an
alternative function that matches the same compiled patterns in a different
way. In certain circumstances, the alternative function has some advantages.
For a discussion of the two matching algorithms, see the
<a href="pcrematching.html"><b>pcrematching</b></a>
page.
</P>
<P>
PCRE is written in C and released as a C library. A number of people have
written wrappers and interfaces of various kinds. In particular, Google Inc.
have provided a comprehensive C++ wrapper. This is now included as part of the
PCRE distribution. The
<a href="pcrecpp.html"><b>pcrecpp</b></a>
page has details of this interface. Other people's contributions can be found
in the <i>Contrib</i> directory at the primary FTP site, which is:
<a href="ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre">ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre</a>
</P>
<P>
Details of exactly which Perl regular expression features are and are not
supported by PCRE are given in separate documents. See the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
and
<a href="pcrecompat.html"><b>pcrecompat</b></a>
pages. There is a syntax summary in the
<a href="pcresyntax.html"><b>pcresyntax</b></a>
page.
</P>
<P>
Some features of PCRE can be included, excluded, or changed when the library is
built. The
<a href="pcre_config.html"><b>pcre_config()</b></a>
function makes it possible for a client to discover which features are
available. The features themselves are described in the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
page. Documentation about building PCRE for various operating systems can be
found in the <b>README</b> and <b>NON-UNIX-USE</b> files in the source
distribution.
</P>
<P>
The library contains a number of undocumented internal functions and data
tables that are used by more than one of the exported external functions, but
which are not intended for use by external callers. Their names all begin with
"_pcre_", which hopefully will not provoke any name clashes. In some
environments, it is possible to control which external symbols are exported
when a shared library is built, and in these cases the undocumented symbols are
not exported.
</P>
<br><a name="SEC2" href="#TOC1">USER DOCUMENTATION</a><br>
<P>
The user documentation for PCRE comprises a number of different sections. In
the "man" format, each of these is a separate "man page". In the HTML format,
each is a separate page, linked from the index page. In the plain text format,
all the sections, except the <b>pcredemo</b> section, are concatenated, for ease
of searching. The sections are as follows:
<pre>
  pcre              this document
  pcre-config       show PCRE installation configuration information
  pcreapi           details of PCRE's native C API
  pcrebuild         options for building PCRE
  pcrecallout       details of the callout feature
  pcrecompat        discussion of Perl compatibility
  pcrecpp           details of the C++ wrapper
  pcredemo          a demonstration C program that uses PCRE
  pcregrep          description of the <b>pcregrep</b> command
  pcrematching      discussion of the two matching algorithms
  pcrepartial       details of the partial matching facility
  pcrepattern       syntax and semantics of supported regular expressions
  pcreperform       discussion of performance issues
  pcreposix         the POSIX-compatible C API
  pcreprecompile    details of saving and re-using precompiled patterns
  pcresample        discussion of the pcredemo program
  pcrestack         discussion of stack usage
  pcresyntax        quick syntax reference
  pcretest          description of the <b>pcretest</b> testing command
</pre>
In addition, in the "man" and HTML formats, there is a short page for each
C library function, listing its arguments and results.
</P>
<br><a name="SEC3" href="#TOC1">LIMITATIONS</a><br>
<P>
There are some size limitations in PCRE but it is hoped that they will never in
practice be relevant.
</P>
<P>
The maximum length of a compiled pattern is 65539 (sic) bytes if PCRE is
compiled with the default internal linkage size of 2. If you want to process
regular expressions that are truly enormous, you can compile PCRE with an
internal linkage size of 3 or 4 (see the <b>README</b> file in the source
distribution and the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
documentation for details). In these cases the limit is substantially larger.
However, the speed of execution is slower.
</P>
<P>
All values in repeating quantifiers must be less than 65536.
</P>
<P>
There is no limit to the number of parenthesized subpatterns, but there can be
no more than 65535 capturing subpatterns.
</P>
<P>
The maximum length of name for a named subpattern is 32 characters, and the
maximum number of named subpatterns is 10000.
</P>
<P>
The maximum length of a subject string is the largest positive number that an
integer variable can hold. However, when using the traditional matching
function, PCRE uses recursion to handle subpatterns and indefinite repetition.
This means that the available stack space may limit the size of a subject
string that can be processed by certain patterns. For a discussion of stack
issues, see the
<a href="pcrestack.html"><b>pcrestack</b></a>
documentation.
<a name="utf8support"></a></P>
<br><a name="SEC4" href="#TOC1">UTF-8 AND UNICODE PROPERTY SUPPORT</a><br>
<P>
From release 3.3, PCRE has had some support for character strings encoded in
the UTF-8 format. For release 4.0 this was greatly extended to cover most
common requirements, and in release 5.0 additional support for Unicode general
category properties was added.
</P>
<P>
In order process UTF-8 strings, you must build PCRE to include UTF-8 support in
the code, and, in addition, you must call
<a href="pcre_compile.html"><b>pcre_compile()</b></a>
with the PCRE_UTF8 option flag, or the pattern must start with the sequence
(*UTF8). When either of these is the case, both the pattern and any subject
strings that are matched against it are treated as UTF-8 strings instead of
strings of 1-byte characters.
</P>
<P>
If you compile PCRE with UTF-8 support, but do not use it at run time, the
library will be a bit bigger, but the additional run time overhead is limited
to testing the PCRE_UTF8 flag occasionally, so should not be very big.
</P>
<P>
If PCRE is built with Unicode character property support (which implies UTF-8
support), the escape sequences \p{..}, \P{..}, and \X are supported.
The available properties that can be tested are limited to the general
category properties such as Lu for an upper case letter or Nd for a decimal
number, the Unicode script names such as Arabic or Han, and the derived
properties Any and L&. A full list is given in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation. Only the short names for properties are supported. For example,
\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported.
Furthermore, in Perl, many properties may optionally be prefixed by "Is", for
compatibility with Perl 5.6. PCRE does not support this.
<a name="utf8strings"></a></P>
<br><b>
Validity of UTF-8 strings
</b><br>
<P>
When you set the PCRE_UTF8 flag, the strings passed as patterns and subjects
are (by default) checked for validity on entry to the relevant functions. From
release 7.3 of PCRE, the check is according the rules of RFC 3629, which are
themselves derived from the Unicode specification. Earlier releases of PCRE
followed the rules of RFC 2279, which allows the full range of 31-bit values (0
to 0x7FFFFFFF). The current check allows only values in the range U+0 to
U+10FFFF, excluding U+D800 to U+DFFF.
</P>
<P>
The excluded code points are the "Low Surrogate Area" of Unicode, of which the
Unicode Standard says this: "The Low Surrogate Area does not contain any
character assignments, consequently no character code charts or namelists are
provided for this area. Surrogates are reserved for use with UTF-16 and then
must be used in pairs." The code points that are encoded by UTF-16 pairs are
available as independent code points in the UTF-8 encoding. (In other words,
the whole surrogate thing is a fudge for UTF-16 which unfortunately messes up
UTF-8.)
</P>
<P>
If an invalid UTF-8 string is passed to PCRE, an error return
(PCRE_ERROR_BADUTF8) is given. In some situations, you may already know that
your strings are valid, and therefore want to skip these checks in order to
improve performance. If you set the PCRE_NO_UTF8_CHECK flag at compile time or
at run time, PCRE assumes that the pattern or subject it is given
(respectively) contains only valid UTF-8 codes. In this case, it does not
diagnose an invalid UTF-8 string.
</P>
<P>
If you pass an invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set, what
happens depends on why the string is invalid. If the string conforms to the
"old" definition of UTF-8 (RFC 2279), it is processed as a string of characters
in the range 0 to 0x7FFFFFFF. In other words, apart from the initial validity
test, PCRE (when in UTF-8 mode) handles strings according to the more liberal
rules of RFC 2279. However, if the string does not even conform to RFC 2279,
the result is undefined. Your program may crash.
</P>
<P>
If you want to process strings of values in the full range 0 to 0x7FFFFFFF,
encoded in a UTF-8-like manner as per the old RFC, you can set
PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in this
situation, you will have to apply your own validity check.
</P>
<br><b>
General comments about UTF-8 mode
</b><br>
<P>
1. An unbraced hexadecimal escape sequence (such as \xb3) matches a two-byte
UTF-8 character if the value is greater than 127.
</P>
<P>
2. Octal numbers up to \777 are recognized, and match two-byte UTF-8
characters for values greater than \177.
</P>
<P>
3. Repeat quantifiers apply to complete UTF-8 characters, not to individual
bytes, for example: \x{100}{3}.
</P>
<P>
4. The dot metacharacter matches one UTF-8 character instead of a single byte.
</P>
<P>
5. The escape sequence \C can be used to match a single byte in UTF-8 mode,
but its use can lead to some strange effects. This facility is not available in
the alternative matching function, <b>pcre_dfa_exec()</b>.
</P>
<P>
6. The character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
test characters of any code value, but the characters that PCRE recognizes as
digits, spaces, or word characters remain the same set as before, all with
values less than 256. This remains true even when PCRE includes Unicode
property support, because to do otherwise would slow down PCRE in many common
cases. If you really want to test for a wider sense of, say, "digit", you
must use Unicode property tests such as \p{Nd}. Note that this also applies to
\b, because it is defined in terms of \w and \W.
</P>
<P>
7. Similarly, characters that match the POSIX named character classes are all
low-valued characters.
</P>
<P>
8. However, the Perl 5.10 horizontal and vertical whitespace matching escapes
(\h, \H, \v, and \V) do match all the appropriate Unicode characters.
</P>
<P>
9. Case-insensitive matching applies only to characters whose values are less
than 128, unless PCRE is built with Unicode property support. Even when Unicode
property support is available, PCRE still uses its own character tables when
checking the case of low-valued characters, so as not to degrade performance.
The Unicode property information is used only for characters with higher
values. Even when Unicode property support is available, PCRE supports
case-insensitive matching only when there is a one-to-one mapping between a
letter's cases. There are a small number of many-to-one mappings in Unicode;
these are not supported by PCRE.
</P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<P>
Putting an actual email address here seems to have been a spam magnet, so I've
taken it away. If you want to email me, use my two initials, followed by the
two digits 10, at the domain cam.ac.uk.
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 01 March 2010
<br>
Copyright &copy; 1997-2010 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcreapi.html000064400000271071150403561470015554 0ustar00<html>
<head>
<title>pcreapi specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcreapi man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE NATIVE API</a>
<li><a name="TOC2" href="#SEC2">PCRE API OVERVIEW</a>
<li><a name="TOC3" href="#SEC3">NEWLINES</a>
<li><a name="TOC4" href="#SEC4">MULTITHREADING</a>
<li><a name="TOC5" href="#SEC5">SAVING PRECOMPILED PATTERNS FOR LATER USE</a>
<li><a name="TOC6" href="#SEC6">CHECKING BUILD-TIME OPTIONS</a>
<li><a name="TOC7" href="#SEC7">COMPILING A PATTERN</a>
<li><a name="TOC8" href="#SEC8">COMPILATION ERROR CODES</a>
<li><a name="TOC9" href="#SEC9">STUDYING A PATTERN</a>
<li><a name="TOC10" href="#SEC10">LOCALE SUPPORT</a>
<li><a name="TOC11" href="#SEC11">INFORMATION ABOUT A PATTERN</a>
<li><a name="TOC12" href="#SEC12">OBSOLETE INFO FUNCTION</a>
<li><a name="TOC13" href="#SEC13">REFERENCE COUNTS</a>
<li><a name="TOC14" href="#SEC14">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a>
<li><a name="TOC15" href="#SEC15">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a>
<li><a name="TOC16" href="#SEC16">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a>
<li><a name="TOC17" href="#SEC17">DUPLICATE SUBPATTERN NAMES</a>
<li><a name="TOC18" href="#SEC18">FINDING ALL POSSIBLE MATCHES</a>
<li><a name="TOC19" href="#SEC19">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a>
<li><a name="TOC20" href="#SEC20">SEE ALSO</a>
<li><a name="TOC21" href="#SEC21">AUTHOR</a>
<li><a name="TOC22" href="#SEC22">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE NATIVE API</a><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b>
</P>
<P>
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<P>
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<P>
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<P>
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
</P>
<P>
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>const char **<i>stringptr</i>);</b>
</P>
<P>
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>);</b>
</P>
<P>
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
</P>
<P>
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>const char **<i>stringptr</i>);</b>
</P>
<P>
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
</P>
<P>
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
</P>
<P>
<b>void pcre_free_substring_list(const char **<i>stringptr</i>);</b>
</P>
<P>
<b>const unsigned char *pcre_maketables(void);</b>
</P>
<P>
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b>
<b>*<i>firstcharptr</i>);</b>
</P>
<P>
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
</P>
<P>
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
<b>char *pcre_version(void);</b>
</P>
<P>
<b>void *(*pcre_malloc)(size_t);</b>
</P>
<P>
<b>void (*pcre_free)(void *);</b>
</P>
<P>
<b>void *(*pcre_stack_malloc)(size_t);</b>
</P>
<P>
<b>void (*pcre_stack_free)(void *);</b>
</P>
<P>
<b>int (*pcre_callout)(pcre_callout_block *);</b>
</P>
<br><a name="SEC2" href="#TOC1">PCRE API OVERVIEW</a><br>
<P>
PCRE has its own native API, which is described in this document. There are
also some wrapper functions that correspond to the POSIX regular expression
API. These are described in the
<a href="pcreposix.html"><b>pcreposix</b></a>
documentation. Both of these APIs define a set of C function calls. A C++
wrapper is distributed with PCRE. It is documented in the
<a href="pcrecpp.html"><b>pcrecpp</b></a>
page.
</P>
<P>
The native API C function prototypes are defined in the header file
<b>pcre.h</b>, and on Unix systems the library itself is called <b>libpcre</b>.
It can normally be accessed by adding <b>-lpcre</b> to the command for linking
an application that uses PCRE. The header file defines the macros PCRE_MAJOR
and PCRE_MINOR to contain the major and minor release numbers for the library.
Applications can use these to include support for different releases of PCRE.
</P>
<P>
The functions <b>pcre_compile()</b>, <b>pcre_compile2()</b>, <b>pcre_study()</b>,
and <b>pcre_exec()</b> are used for compiling and matching regular expressions
in a Perl-compatible manner. A sample program that demonstrates the simplest
way of using them is provided in the file called <i>pcredemo.c</i> in the PCRE
source distribution. A listing of this program is given in the
<a href="pcredemo.html"><b>pcredemo</b></a>
documentation, and the
<a href="pcresample.html"><b>pcresample</b></a>
documentation describes how to compile and run it.
</P>
<P>
A second matching function, <b>pcre_dfa_exec()</b>, which is not
Perl-compatible, is also provided. This uses a different algorithm for the
matching. The alternative algorithm finds all possible matches (at a given
point in the subject), and scans the subject just once (unless there are
lookbehind assertions). However, this algorithm does not return captured
substrings. A description of the two matching algorithms and their advantages
and disadvantages is given in the
<a href="pcrematching.html"><b>pcrematching</b></a>
documentation.
</P>
<P>
In addition to the main compiling and matching functions, there are convenience
functions for extracting captured substrings from a subject string that is
matched by <b>pcre_exec()</b>. They are:
<pre>
  <b>pcre_copy_substring()</b>
  <b>pcre_copy_named_substring()</b>
  <b>pcre_get_substring()</b>
  <b>pcre_get_named_substring()</b>
  <b>pcre_get_substring_list()</b>
  <b>pcre_get_stringnumber()</b>
  <b>pcre_get_stringtable_entries()</b>
</pre>
<b>pcre_free_substring()</b> and <b>pcre_free_substring_list()</b> are also
provided, to free the memory used for extracted strings.
</P>
<P>
The function <b>pcre_maketables()</b> is used to build a set of character tables
in the current locale for passing to <b>pcre_compile()</b>, <b>pcre_exec()</b>,
or <b>pcre_dfa_exec()</b>. This is an optional facility that is provided for
specialist use. Most commonly, no special tables are passed, in which case
internal tables that are generated when PCRE is built are used.
</P>
<P>
The function <b>pcre_fullinfo()</b> is used to find out information about a
compiled pattern; <b>pcre_info()</b> is an obsolete version that returns only
some of the available information, but is retained for backwards compatibility.
The function <b>pcre_version()</b> returns a pointer to a string containing the
version of PCRE and its date of release.
</P>
<P>
The function <b>pcre_refcount()</b> maintains a reference count in a data block
containing a compiled pattern. This is provided for the benefit of
object-oriented applications.
</P>
<P>
The global variables <b>pcre_malloc</b> and <b>pcre_free</b> initially contain
the entry points of the standard <b>malloc()</b> and <b>free()</b> functions,
respectively. PCRE calls the memory management functions via these variables,
so a calling program can replace them if it wishes to intercept the calls. This
should be done before calling any PCRE functions.
</P>
<P>
The global variables <b>pcre_stack_malloc</b> and <b>pcre_stack_free</b> are also
indirections to memory management functions. These special functions are used
only when PCRE is compiled to use the heap for remembering data, instead of
recursive function calls, when running the <b>pcre_exec()</b> function. See the
<a href="pcrebuild.html"><b>pcrebuild</b></a>
documentation for details of how to do this. It is a non-standard way of
building PCRE, for use in environments that have limited stacks. Because of the
greater use of memory management, it runs more slowly. Separate functions are
provided so that special-purpose external code can be used for this case. When
used, these functions are always called in a stack-like manner (last obtained,
first freed), and always for memory blocks of the same size. There is a
discussion about PCRE's stack usage in the
<a href="pcrestack.html"><b>pcrestack</b></a>
documentation.
</P>
<P>
The global variable <b>pcre_callout</b> initially contains NULL. It can be set
by the caller to a "callout" function, which PCRE will then call at specified
points during a matching operation. Details are given in the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation.
<a name="newlines"></a></P>
<br><a name="SEC3" href="#TOC1">NEWLINES</a><br>
<P>
PCRE supports five different conventions for indicating line breaks in
strings: a single CR (carriage return) character, a single LF (linefeed)
character, the two-character sequence CRLF, any of the three preceding, or any
Unicode newline sequence. The Unicode newline sequences are the three just
mentioned, plus the single characters VT (vertical tab, U+000B), FF (formfeed,
U+000C), NEL (next line, U+0085), LS (line separator, U+2028), and PS
(paragraph separator, U+2029).
</P>
<P>
Each of the first three conventions is used by at least one operating system as
its standard newline sequence. When PCRE is built, a default can be specified.
The default default is LF, which is the Unix standard. When PCRE is run, the
default can be overridden, either when a pattern is compiled, or when it is
matched.
</P>
<P>
At compile time, the newline convention can be specified by the <i>options</i>
argument of <b>pcre_compile()</b>, or it can be specified by special text at the
start of the pattern itself; this overrides any other settings. See the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
page for details of the special character sequences.
</P>
<P>
In the PCRE documentation the word "newline" is used to mean "the character or
pair of characters that indicate a line break". The choice of newline
convention affects the handling of the dot, circumflex, and dollar
metacharacters, the handling of #-comments in /x mode, and, when CRLF is a
recognized line ending sequence, the match position advancement for a
non-anchored pattern. There is more detail about this in the
<a href="#execoptions">section on <b>pcre_exec()</b> options</a>
below.
</P>
<P>
The choice of newline convention does not affect the interpretation of
the \n or \r escape sequences, nor does it affect what \R matches, which is
controlled in a similar way, but by separate options.
</P>
<br><a name="SEC4" href="#TOC1">MULTITHREADING</a><br>
<P>
The PCRE functions can be used in multi-threading applications, with the
proviso that the memory management functions pointed to by <b>pcre_malloc</b>,
<b>pcre_free</b>, <b>pcre_stack_malloc</b>, and <b>pcre_stack_free</b>, and the
callout function pointed to by <b>pcre_callout</b>, are shared by all threads.
</P>
<P>
The compiled form of a regular expression is not altered during matching, so
the same compiled pattern can safely be used by several threads at once.
</P>
<br><a name="SEC5" href="#TOC1">SAVING PRECOMPILED PATTERNS FOR LATER USE</a><br>
<P>
The compiled form of a regular expression can be saved and re-used at a later
time, possibly by a different program, and even on a host other than the one on
which it was compiled. Details are given in the
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
documentation. However, compiling a regular expression with one version of PCRE
for use with a different version is not guaranteed to work and may cause
crashes.
</P>
<br><a name="SEC6" href="#TOC1">CHECKING BUILD-TIME OPTIONS</a><br>
<P>
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
The function <b>pcre_config()</b> makes it possible for a PCRE client to
discover which optional features have been compiled into the PCRE library. The
<a href="pcrebuild.html"><b>pcrebuild</b></a>
documentation has more details about these optional features.
</P>
<P>
The first argument for <b>pcre_config()</b> is an integer, specifying which
information is required; the second argument is a pointer to a variable into
which the information is placed. The following information is available:
<pre>
  PCRE_CONFIG_UTF8
</pre>
The output is an integer that is set to one if UTF-8 support is available;
otherwise it is set to zero.
<pre>
  PCRE_CONFIG_UNICODE_PROPERTIES
</pre>
The output is an integer that is set to one if support for Unicode character
properties is available; otherwise it is set to zero.
<pre>
  PCRE_CONFIG_NEWLINE
</pre>
The output is an integer whose value specifies the default character sequence
that is recognized as meaning "newline". The four values that are supported
are: 10 for LF, 13 for CR, 3338 for CRLF, -2 for ANYCRLF, and -1 for ANY.
Though they are derived from ASCII, the same values are returned in EBCDIC
environments. The default should normally correspond to the standard sequence
for your operating system.
<pre>
  PCRE_CONFIG_BSR
</pre>
The output is an integer whose value indicates what character sequences the \R
escape sequence matches by default. A value of 0 means that \R matches any
Unicode line ending sequence; a value of 1 means that \R matches only CR, LF,
or CRLF. The default can be overridden when a pattern is compiled or matched.
<pre>
  PCRE_CONFIG_LINK_SIZE
</pre>
The output is an integer that contains the number of bytes used for internal
linkage in compiled regular expressions. The value is 2, 3, or 4. Larger values
allow larger regular expressions to be compiled, at the expense of slower
matching. The default value of 2 is sufficient for all but the most massive
patterns, since it allows the compiled pattern to be up to 64K in size.
<pre>
  PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
</pre>
The output is an integer that contains the threshold above which the POSIX
interface uses <b>malloc()</b> for output vectors. Further details are given in
the
<a href="pcreposix.html"><b>pcreposix</b></a>
documentation.
<pre>
  PCRE_CONFIG_MATCH_LIMIT
</pre>
The output is a long integer that gives the default limit for the number of
internal matching function calls in a <b>pcre_exec()</b> execution. Further
details are given with <b>pcre_exec()</b> below.
<pre>
  PCRE_CONFIG_MATCH_LIMIT_RECURSION
</pre>
The output is a long integer that gives the default limit for the depth of
recursion when calling the internal matching function in a <b>pcre_exec()</b>
execution. Further details are given with <b>pcre_exec()</b> below.
<pre>
  PCRE_CONFIG_STACKRECURSE
</pre>
The output is an integer that is set to one if internal recursion when running
<b>pcre_exec()</b> is implemented by recursive function calls that use the stack
to remember their state. This is the usual way that PCRE is compiled. The
output is zero if PCRE was compiled to use blocks of data on the heap instead
of recursive function calls. In this case, <b>pcre_stack_malloc</b> and
<b>pcre_stack_free</b> are called to manage memory blocks on the heap, thus
avoiding the use of the stack.
</P>
<br><a name="SEC7" href="#TOC1">COMPILING A PATTERN</a><br>
<P>
<b>pcre *pcre_compile(const char *<i>pattern</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<P>
Either of the functions <b>pcre_compile()</b> or <b>pcre_compile2()</b> can be
called to compile a pattern into an internal form. The only difference between
the two interfaces is that <b>pcre_compile2()</b> has an additional argument,
<i>errorcodeptr</i>, via which a numerical error code can be returned. To avoid
too much repetition, we refer just to <b>pcre_compile()</b> below, but the
information applies equally to <b>pcre_compile2()</b>.
</P>
<P>
The pattern is a C string terminated by a binary zero, and is passed in the
<i>pattern</i> argument. A pointer to a single block of memory that is obtained
via <b>pcre_malloc</b> is returned. This contains the compiled code and related
data. The <b>pcre</b> type is defined for the returned block; this is a typedef
for a structure whose contents are not externally defined. It is up to the
caller to free the memory (via <b>pcre_free</b>) when it is no longer required.
</P>
<P>
Although the compiled code of a PCRE regex is relocatable, that is, it does not
depend on memory location, the complete <b>pcre</b> data block is not
fully relocatable, because it may contain a copy of the <i>tableptr</i>
argument, which is an address (see below).
</P>
<P>
The <i>options</i> argument contains various bit settings that affect the
compilation. It should be zero if no options are required. The available
options are described below. Some of them (in particular, those that are
compatible with Perl, but some others as well) can also be set and unset from
within the pattern (see the detailed description in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation). For those options that can be different in different parts of
the pattern, the contents of the <i>options</i> argument specifies their
settings at the start of compilation and execution. The PCRE_ANCHORED,
PCRE_BSR_<i>xxx</i>, and PCRE_NEWLINE_<i>xxx</i> options can be set at the time
of matching as well as at compile time.
</P>
<P>
If <i>errptr</i> is NULL, <b>pcre_compile()</b> returns NULL immediately.
Otherwise, if compilation of a pattern fails, <b>pcre_compile()</b> returns
NULL, and sets the variable pointed to by <i>errptr</i> to point to a textual
error message. This is a static string that is part of the library. You must
not try to free it. The byte offset from the start of the pattern to the
character that was being processed when the error was discovered is placed in
the variable pointed to by <i>erroffset</i>, which must not be NULL. If it is,
an immediate error is given. Some errors are not detected until checks are
carried out when the whole pattern has been scanned; in this case the offset is
set to the end of the pattern.
</P>
<P>
If <b>pcre_compile2()</b> is used instead of <b>pcre_compile()</b>, and the
<i>errorcodeptr</i> argument is not NULL, a non-zero error code number is
returned via this argument in the event of an error. This is in addition to the
textual error message. Error codes and messages are listed below.
</P>
<P>
If the final argument, <i>tableptr</i>, is NULL, PCRE uses a default set of
character tables that are built when PCRE is compiled, using the default C
locale. Otherwise, <i>tableptr</i> must be an address that is the result of a
call to <b>pcre_maketables()</b>. This value is stored with the compiled
pattern, and used again by <b>pcre_exec()</b>, unless another table pointer is
passed to it. For more discussion, see the section on locale support below.
</P>
<P>
This code fragment shows a typical straightforward call to <b>pcre_compile()</b>:
<pre>
  pcre *re;
  const char *error;
  int erroffset;
  re = pcre_compile(
    "^A.*Z",          /* the pattern */
    0,                /* default options */
    &error,           /* for error message */
    &erroffset,       /* for error offset */
    NULL);            /* use default character tables */
</pre>
The following names for option bits are defined in the <b>pcre.h</b> header
file:
<pre>
  PCRE_ANCHORED
</pre>
If this bit is set, the pattern is forced to be "anchored", that is, it is
constrained to match only at the first matching point in the string that is
being searched (the "subject string"). This effect can also be achieved by
appropriate constructs in the pattern itself, which is the only way to do it in
Perl.
<pre>
  PCRE_AUTO_CALLOUT
</pre>
If this bit is set, <b>pcre_compile()</b> automatically inserts callout items,
all with number 255, before each pattern item. For discussion of the callout
facility, see the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation.
<pre>
  PCRE_BSR_ANYCRLF
  PCRE_BSR_UNICODE
</pre>
These options (which are mutually exclusive) control what the \R escape
sequence matches. The choice is either to match only CR, LF, or CRLF, or to
match any Unicode newline sequence. The default is specified when PCRE is
built. It can be overridden from within the pattern, or by setting an option
when a compiled pattern is matched.
<pre>
  PCRE_CASELESS
</pre>
If this bit is set, letters in the pattern match both upper and lower case
letters. It is equivalent to Perl's /i option, and it can be changed within a
pattern by a (?i) option setting. In UTF-8 mode, PCRE always understands the
concept of case for characters whose values are less than 128, so caseless
matching is always possible. For characters with higher values, the concept of
case is supported if PCRE is compiled with Unicode property support, but not
otherwise. If you want to use caseless matching for characters 128 and above,
you must ensure that PCRE is compiled with Unicode property support as well as
with UTF-8 support.
<pre>
  PCRE_DOLLAR_ENDONLY
</pre>
If this bit is set, a dollar metacharacter in the pattern matches only at the
end of the subject string. Without this option, a dollar also matches
immediately before a newline at the end of the string (but not before any other
newlines). The PCRE_DOLLAR_ENDONLY option is ignored if PCRE_MULTILINE is set.
There is no equivalent to this option in Perl, and no way to set it within a
pattern.
<pre>
  PCRE_DOTALL
</pre>
If this bit is set, a dot metacharater in the pattern matches all characters,
including those that indicate newline. Without it, a dot does not match when
the current position is at a newline. This option is equivalent to Perl's /s
option, and it can be changed within a pattern by a (?s) option setting. A
negative class such as [^a] always matches newline characters, independent of
the setting of this option.
<pre>
  PCRE_DUPNAMES
</pre>
If this bit is set, names used to identify capturing subpatterns need not be
unique. This can be helpful for certain types of pattern when it is known that
only one instance of the named subpattern can ever be matched. There are more
details of named subpatterns below; see also the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation.
<pre>
  PCRE_EXTENDED
</pre>
If this bit is set, whitespace data characters in the pattern are totally
ignored except when escaped or inside a character class. Whitespace does not
include the VT character (code 11). In addition, characters between an
unescaped # outside a character class and the next newline, inclusive, are also
ignored. This is equivalent to Perl's /x option, and it can be changed within a
pattern by a (?x) option setting.
</P>
<P>
This option makes it possible to include comments inside complicated patterns.
Note, however, that this applies only to data characters. Whitespace characters
may never appear within special character sequences in a pattern, for example
within the sequence (?( which introduces a conditional subpattern.
<pre>
  PCRE_EXTRA
</pre>
This option was invented in order to turn on additional functionality of PCRE
that is incompatible with Perl, but it is currently of very little use. When
set, any backslash in a pattern that is followed by a letter that has no
special meaning causes an error, thus reserving these combinations for future
expansion. By default, as in Perl, a backslash followed by a letter with no
special meaning is treated as a literal. (Perl can, however, be persuaded to
give a warning for this.) There are at present no other features controlled by
this option. It can also be set by a (?X) option setting within a pattern.
<pre>
  PCRE_FIRSTLINE
</pre>
If this option is set, an unanchored pattern is required to match before or at
the first newline in the subject string, though the matched text may continue
over the newline.
<pre>
  PCRE_JAVASCRIPT_COMPAT
</pre>
If this option is set, PCRE's behaviour is changed in some ways so that it is
compatible with JavaScript rather than Perl. The changes are as follows:
</P>
<P>
(1) A lone closing square bracket in a pattern causes a compile-time error,
because this is illegal in JavaScript (by default it is treated as a data
character). Thus, the pattern AB]CD becomes illegal when this option is set.
</P>
<P>
(2) At run time, a back reference to an unset subpattern group matches an empty
string (by default this causes the current matching alternative to fail). A
pattern such as (\1)(a) succeeds when this option is set (assuming it can find
an "a" in the subject), whereas it fails by default, for Perl compatibility.
<pre>
  PCRE_MULTILINE
</pre>
By default, PCRE treats the subject string as consisting of a single line of
characters (even if it actually contains newlines). The "start of line"
metacharacter (^) matches only at the start of the string, while the "end of
line" metacharacter ($) matches only at the end of the string, or before a
terminating newline (unless PCRE_DOLLAR_ENDONLY is set). This is the same as
Perl.
</P>
<P>
When PCRE_MULTILINE it is set, the "start of line" and "end of line" constructs
match immediately following or immediately before internal newlines in the
subject string, respectively, as well as at the very start and end. This is
equivalent to Perl's /m option, and it can be changed within a pattern by a
(?m) option setting. If there are no newlines in a subject string, or no
occurrences of ^ or $ in a pattern, setting PCRE_MULTILINE has no effect.
<pre>
  PCRE_NEWLINE_CR
  PCRE_NEWLINE_LF
  PCRE_NEWLINE_CRLF
  PCRE_NEWLINE_ANYCRLF
  PCRE_NEWLINE_ANY
</pre>
These options override the default newline definition that was chosen when PCRE
was built. Setting the first or the second specifies that a newline is
indicated by a single character (CR or LF, respectively). Setting
PCRE_NEWLINE_CRLF specifies that a newline is indicated by the two-character
CRLF sequence. Setting PCRE_NEWLINE_ANYCRLF specifies that any of the three
preceding sequences should be recognized. Setting PCRE_NEWLINE_ANY specifies
that any Unicode newline sequence should be recognized. The Unicode newline
sequences are the three just mentioned, plus the single characters VT (vertical
tab, U+000B), FF (formfeed, U+000C), NEL (next line, U+0085), LS (line
separator, U+2028), and PS (paragraph separator, U+2029). The last two are
recognized only in UTF-8 mode.
</P>
<P>
The newline setting in the options word uses three bits that are treated
as a number, giving eight possibilities. Currently only six are used (default
plus the five values above). This means that if you set more than one newline
option, the combination may or may not be sensible. For example,
PCRE_NEWLINE_CR with PCRE_NEWLINE_LF is equivalent to PCRE_NEWLINE_CRLF, but
other combinations may yield unused numbers and cause an error.
</P>
<P>
The only time that a line break is specially recognized when compiling a
pattern is if PCRE_EXTENDED is set, and an unescaped # outside a character
class is encountered. This indicates a comment that lasts until after the next
line break sequence. In other circumstances, line break sequences are treated
as literal data, except that in PCRE_EXTENDED mode, both CR and LF are treated
as whitespace characters and are therefore ignored.
</P>
<P>
The newline option that is set at compile time becomes the default that is used
for <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, but it can be overridden.
<pre>
  PCRE_NO_AUTO_CAPTURE
</pre>
If this option is set, it disables the use of numbered capturing parentheses in
the pattern. Any opening parenthesis that is not followed by ? behaves as if it
were followed by ?: but named parentheses can still be used for capturing (and
they acquire numbers in the usual way). There is no equivalent of this option
in Perl.
<pre>
  PCRE_UNGREEDY
</pre>
This option inverts the "greediness" of the quantifiers so that they are not
greedy by default, but become greedy if followed by "?". It is not compatible
with Perl. It can also be set by a (?U) option setting within the pattern.
<pre>
  PCRE_UTF8
</pre>
This option causes PCRE to regard both the pattern and the subject as strings
of UTF-8 characters instead of single-byte character strings. However, it is
available only when PCRE is built to include UTF-8 support. If not, the use
of this option provokes an error. Details of how this option changes the
behaviour of PCRE are given in the
<a href="pcre.html#utf8support">section on UTF-8 support</a>
in the main
<a href="pcre.html"><b>pcre</b></a>
page.
<pre>
  PCRE_NO_UTF8_CHECK
</pre>
When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is
automatically checked. There is a discussion about the
<a href="pcre.html#utf8strings">validity of UTF-8 strings</a>
in the main
<a href="pcre.html"><b>pcre</b></a>
page. If an invalid UTF-8 sequence of bytes is found, <b>pcre_compile()</b>
returns an error. If you already know that your pattern is valid, and you want
to skip this check for performance reasons, you can set the PCRE_NO_UTF8_CHECK
option. When it is set, the effect of passing an invalid UTF-8 string as a
pattern is undefined. It may cause your program to crash. Note that this option
can also be passed to <b>pcre_exec()</b> and <b>pcre_dfa_exec()</b>, to suppress
the UTF-8 validity checking of subject strings.
</P>
<br><a name="SEC8" href="#TOC1">COMPILATION ERROR CODES</a><br>
<P>
The following table lists the error codes than may be returned by
<b>pcre_compile2()</b>, along with the error messages that may be returned by
both compiling functions. As PCRE has developed, some error codes have fallen
out of use. To avoid confusion, they have not been re-used.
<pre>
   0  no error
   1  \ at end of pattern
   2  \c at end of pattern
   3  unrecognized character follows \
   4  numbers out of order in {} quantifier
   5  number too big in {} quantifier
   6  missing terminating ] for character class
   7  invalid escape sequence in character class
   8  range out of order in character class
   9  nothing to repeat
  10  [this code is not in use]
  11  internal error: unexpected repeat
  12  unrecognized character after (? or (?-
  13  POSIX named classes are supported only within a class
  14  missing )
  15  reference to non-existent subpattern
  16  erroffset passed as NULL
  17  unknown option bit(s) set
  18  missing ) after comment
  19  [this code is not in use]
  20  regular expression is too large
  21  failed to get memory
  22  unmatched parentheses
  23  internal error: code overflow
  24  unrecognized character after (?&#60;
  25  lookbehind assertion is not fixed length
  26  malformed number or name after (?(
  27  conditional group contains more than two branches
  28  assertion expected after (?(
  29  (?R or (?[+-]digits must be followed by )
  30  unknown POSIX class name
  31  POSIX collating elements are not supported
  32  this version of PCRE is not compiled with PCRE_UTF8 support
  33  [this code is not in use]
  34  character value in \x{...} sequence is too large
  35  invalid condition (?(0)
  36  \C not allowed in lookbehind assertion
  37  PCRE does not support \L, \l, \N, \U, or \u
  38  number after (?C is &#62; 255
  39  closing ) for (?C expected
  40  recursive call could loop indefinitely
  41  unrecognized character after (?P
  42  syntax error in subpattern name (missing terminator)
  43  two named subpatterns have the same name
  44  invalid UTF-8 string
  45  support for \P, \p, and \X has not been compiled
  46  malformed \P or \p sequence
  47  unknown property name after \P or \p
  48  subpattern name is too long (maximum 32 characters)
  49  too many named subpatterns (maximum 10000)
  50  [this code is not in use]
  51  octal value is greater than \377 (not in UTF-8 mode)
  52  internal error: overran compiling workspace
  53  internal error: previously-checked referenced subpattern not found
  54  DEFINE group contains more than one branch
  55  repeating a DEFINE group is not allowed
  56  inconsistent NEWLINE options
  57  \g is not followed by a braced, angle-bracketed, or quoted
        name/number or by a plain number
  58  a numbered reference must not be zero
  59  (*VERB) with an argument is not supported
  60  (*VERB) not recognized
  61  number is too big
  62  subpattern name expected
  63  digit expected after (?+
  64  ] is an invalid data character in JavaScript compatibility mode
</pre>
The numbers 32 and 10000 in errors 48 and 49 are defaults; different values may
be used if the limits were changed when PCRE was built.
</P>
<br><a name="SEC9" href="#TOC1">STUDYING A PATTERN</a><br>
<P>
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i></b>
<b>const char **<i>errptr</i>);</b>
</P>
<P>
If a compiled pattern is going to be used several times, it is worth spending
more time analyzing it in order to speed up the time taken for matching. The
function <b>pcre_study()</b> takes a pointer to a compiled pattern as its first
argument. If studying the pattern produces additional information that will
help speed up matching, <b>pcre_study()</b> returns a pointer to a
<b>pcre_extra</b> block, in which the <i>study_data</i> field points to the
results of the study.
</P>
<P>
The returned value from <b>pcre_study()</b> can be passed directly to
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. However, a <b>pcre_extra</b> block
also contains other fields that can be set by the caller before the block is
passed; these are described
<a href="#extradata">below</a>
in the section on matching a pattern.
</P>
<P>
If studying the pattern does not produce any useful information,
<b>pcre_study()</b> returns NULL. In that circumstance, if the calling program
wants to pass any of the other fields to <b>pcre_exec()</b> or
<b>pcre_dfa_exec()</b>, it must set up its own <b>pcre_extra</b> block.
</P>
<P>
The second argument of <b>pcre_study()</b> contains option bits. At present, no
options are defined, and this argument should always be zero.
</P>
<P>
The third argument for <b>pcre_study()</b> is a pointer for an error message. If
studying succeeds (even if no data is returned), the variable it points to is
set to NULL. Otherwise it is set to point to a textual error message. This is a
static string that is part of the library. You must not try to free it. You
should test the error pointer for NULL after calling <b>pcre_study()</b>, to be
sure that it has run successfully.
</P>
<P>
This is a typical call to <b>pcre_study</b>():
<pre>
  pcre_extra *pe;
  pe = pcre_study(
    re,             /* result of pcre_compile() */
    0,              /* no options exist */
    &error);        /* set to NULL or points to a message */
</pre>
Studying a pattern does two things: first, a lower bound for the length of
subject string that is needed to match the pattern is computed. This does not
mean that there are any strings of that length that match, but it does
guarantee that no shorter strings match. The value is used by
<b>pcre_exec()</b> and <b>pcre_dfa_exec()</b> to avoid wasting time by trying to
match strings that are shorter than the lower bound. You can find out the value
in a calling program via the <b>pcre_fullinfo()</b> function.
</P>
<P>
Studying a pattern is also useful for non-anchored patterns that do not have a
single fixed starting character. A bitmap of possible starting bytes is
created. This speeds up finding a position in the subject at which to start
matching.
<a name="localesupport"></a></P>
<br><a name="SEC10" href="#TOC1">LOCALE SUPPORT</a><br>
<P>
PCRE handles caseless matching, and determines whether characters are letters,
digits, or whatever, by reference to a set of tables, indexed by character
value. When running in UTF-8 mode, this applies only to characters with codes
less than 128. Higher-valued codes never match escapes such as \w or \d, but
can be tested with \p if PCRE is built with Unicode character property
support. The use of locales with Unicode is discouraged. If you are handling
characters with codes greater than 128, you should either use UTF-8 and
Unicode, or use locales, but not try to mix the two.
</P>
<P>
PCRE contains an internal set of tables that are used when the final argument
of <b>pcre_compile()</b> is NULL. These are sufficient for many applications.
Normally, the internal tables recognize only ASCII characters. However, when
PCRE is built, it is possible to cause the internal tables to be rebuilt in the
default "C" locale of the local system, which may cause them to be different.
</P>
<P>
The internal tables can always be overridden by tables supplied by the
application that calls PCRE. These may be created in a different locale from
the default. As more and more applications change to using Unicode, the need
for this locale support is expected to die away.
</P>
<P>
External tables are built by calling the <b>pcre_maketables()</b> function,
which has no arguments, in the relevant locale. The result can then be passed
to <b>pcre_compile()</b> or <b>pcre_exec()</b> as often as necessary. For
example, to build and use tables that are appropriate for the French locale
(where accented characters with values greater than 128 are treated as letters),
the following code could be used:
<pre>
  setlocale(LC_CTYPE, "fr_FR");
  tables = pcre_maketables();
  re = pcre_compile(..., tables);
</pre>
The locale name "fr_FR" is used on Linux and other Unix-like systems; if you
are using Windows, the name for the French locale is "french".
</P>
<P>
When <b>pcre_maketables()</b> runs, the tables are built in memory that is
obtained via <b>pcre_malloc</b>. It is the caller's responsibility to ensure
that the memory containing the tables remains available for as long as it is
needed.
</P>
<P>
The pointer that is passed to <b>pcre_compile()</b> is saved with the compiled
pattern, and the same tables are used via this pointer by <b>pcre_study()</b>
and normally also by <b>pcre_exec()</b>. Thus, by default, for any single
pattern, compilation, studying and matching all happen in the same locale, but
different patterns can be compiled in different locales.
</P>
<P>
It is possible to pass a table pointer or NULL (indicating the use of the
internal tables) to <b>pcre_exec()</b>. Although not intended for this purpose,
this facility could be used to match a pattern in a different locale from the
one in which it was compiled. Passing table pointers at run time is discussed
below in the section on matching a pattern.
</P>
<br><a name="SEC11" href="#TOC1">INFORMATION ABOUT A PATTERN</a><br>
<P>
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
</P>
<P>
The <b>pcre_fullinfo()</b> function returns information about a compiled
pattern. It replaces the obsolete <b>pcre_info()</b> function, which is
nevertheless retained for backwards compability (and is documented below).
</P>
<P>
The first argument for <b>pcre_fullinfo()</b> is a pointer to the compiled
pattern. The second argument is the result of <b>pcre_study()</b>, or NULL if
the pattern was not studied. The third argument specifies which piece of
information is required, and the fourth argument is a pointer to a variable
to receive the data. The yield of the function is zero for success, or one of
the following negative numbers:
<pre>
  PCRE_ERROR_NULL       the argument <i>code</i> was NULL
                        the argument <i>where</i> was NULL
  PCRE_ERROR_BADMAGIC   the "magic number" was not found
  PCRE_ERROR_BADOPTION  the value of <i>what</i> was invalid
</pre>
The "magic number" is placed at the start of each compiled pattern as an simple
check against passing an arbitrary memory pointer. Here is a typical call of
<b>pcre_fullinfo()</b>, to obtain the length of the compiled pattern:
<pre>
  int rc;
  size_t length;
  rc = pcre_fullinfo(
    re,               /* result of pcre_compile() */
    pe,               /* result of pcre_study(), or NULL */
    PCRE_INFO_SIZE,   /* what is required */
    &length);         /* where to put the data */
</pre>
The possible values for the third argument are defined in <b>pcre.h</b>, and are
as follows:
<pre>
  PCRE_INFO_BACKREFMAX
</pre>
Return the number of the highest back reference in the pattern. The fourth
argument should point to an <b>int</b> variable. Zero is returned if there are
no back references.
<pre>
  PCRE_INFO_CAPTURECOUNT
</pre>
Return the number of capturing subpatterns in the pattern. The fourth argument
should point to an <b>int</b> variable.
<pre>
  PCRE_INFO_DEFAULT_TABLES
</pre>
Return a pointer to the internal default character tables within PCRE. The
fourth argument should point to an <b>unsigned char *</b> variable. This
information call is provided for internal use by the <b>pcre_study()</b>
function. External callers can cause PCRE to use its internal tables by passing
a NULL table pointer.
<pre>
  PCRE_INFO_FIRSTBYTE
</pre>
Return information about the first byte of any matched string, for a
non-anchored pattern. The fourth argument should point to an <b>int</b>
variable. (This option used to be called PCRE_INFO_FIRSTCHAR; the old name is
still recognized for backwards compatibility.)
</P>
<P>
If there is a fixed first byte, for example, from a pattern such as
(cat|cow|coyote), its value is returned. Otherwise, if either
<br>
<br>
(a) the pattern was compiled with the PCRE_MULTILINE option, and every branch
starts with "^", or
<br>
<br>
(b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not set
(if it were set, the pattern would be anchored),
<br>
<br>
-1 is returned, indicating that the pattern matches only at the start of a
subject string or after any newline within the string. Otherwise -2 is
returned. For anchored patterns, -2 is returned.
<pre>
  PCRE_INFO_FIRSTTABLE
</pre>
If the pattern was studied, and this resulted in the construction of a 256-bit
table indicating a fixed set of bytes for the first byte in any matching
string, a pointer to the table is returned. Otherwise NULL is returned. The
fourth argument should point to an <b>unsigned char *</b> variable.
<pre>
  PCRE_INFO_HASCRORLF
</pre>
Return 1 if the pattern contains any explicit matches for CR or LF characters,
otherwise 0. The fourth argument should point to an <b>int</b> variable. An
explicit match is either a literal CR or LF character, or \r or \n.
<pre>
  PCRE_INFO_JCHANGED
</pre>
Return 1 if the (?J) or (?-J) option setting is used in the pattern, otherwise
0. The fourth argument should point to an <b>int</b> variable. (?J) and
(?-J) set and unset the local PCRE_DUPNAMES option, respectively.
<pre>
  PCRE_INFO_LASTLITERAL
</pre>
Return the value of the rightmost literal byte that must exist in any matched
string, other than at its start, if such a byte has been recorded. The fourth
argument should point to an <b>int</b> variable. If there is no such byte, -1 is
returned. For anchored patterns, a last literal byte is recorded only if it
follows something of variable length. For example, for the pattern
/^a\d+z\d+/ the returned value is "z", but for /^a\dz\d/ the returned value
is -1.
<pre>
  PCRE_INFO_MINLENGTH
</pre>
If the pattern was studied and a minimum length for matching subject strings
was computed, its value is returned. Otherwise the returned value is -1. The
value is a number of characters, not bytes (this may be relevant in UTF-8
mode). The fourth argument should point to an <b>int</b> variable. A
non-negative value is a lower bound to the length of any matching string. There
may not be any strings of that length that do actually match, but every string
that does match is at least that long.
<pre>
  PCRE_INFO_NAMECOUNT
  PCRE_INFO_NAMEENTRYSIZE
  PCRE_INFO_NAMETABLE
</pre>
PCRE supports the use of named as well as numbered capturing parentheses. The
names are just an additional way of identifying the parentheses, which still
acquire numbers. Several convenience functions such as
<b>pcre_get_named_substring()</b> are provided for extracting captured
substrings by name. It is also possible to extract the data directly, by first
converting the name to a number in order to access the correct pointers in the
output vector (described with <b>pcre_exec()</b> below). To do the conversion,
you need to use the name-to-number map, which is described by these three
values.
</P>
<P>
The map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT gives
the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size of each
entry; both of these return an <b>int</b> value. The entry size depends on the
length of the longest name. PCRE_INFO_NAMETABLE returns a pointer to the first
entry of the table (a pointer to <b>char</b>). The first two bytes of each entry
are the number of the capturing parenthesis, most significant byte first. The
rest of the entry is the corresponding name, zero terminated.
</P>
<P>
The names are in alphabetical order. Duplicate names may appear if (?| is used
to create multiple groups with the same number, as described in the
<a href="pcrepattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
page. Duplicate names for subpatterns with different numbers are permitted only
if PCRE_DUPNAMES is set. In all cases of duplicate names, they appear in the
table in the order in which they were found in the pattern. In the absence of
(?| this is the order of increasing number; when (?| is used this is not
necessarily the case because later subpatterns may have lower numbers.
</P>
<P>
As a simple example of the name/number table, consider the following pattern
(assume PCRE_EXTENDED is set, so white space - including newlines - is
ignored):
<pre>
  (?&#60;date&#62; (?&#60;year&#62;(\d\d)?\d\d) - (?&#60;month&#62;\d\d) - (?&#60;day&#62;\d\d) )
</pre>
There are four named subpatterns, so the table has four entries, and each entry
in the table is eight bytes long. The table is as follows, with non-printing
bytes shows in hexadecimal, and undefined bytes shown as ??:
<pre>
  00 01 d  a  t  e  00 ??
  00 05 d  a  y  00 ?? ??
  00 04 m  o  n  t  h  00
  00 02 y  e  a  r  00 ??
</pre>
When writing code to extract data from named subpatterns using the
name-to-number map, remember that the length of the entries is likely to be
different for each compiled pattern.
<pre>
  PCRE_INFO_OKPARTIAL
</pre>
Return 1 if the pattern can be used for partial matching with
<b>pcre_exec()</b>, otherwise 0. The fourth argument should point to an
<b>int</b> variable. From release 8.00, this always returns 1, because the
restrictions that previously applied to partial matching have been lifted. The
<a href="pcrepartial.html"><b>pcrepartial</b></a>
documentation gives details of partial matching.
<pre>
  PCRE_INFO_OPTIONS
</pre>
Return a copy of the options with which the pattern was compiled. The fourth
argument should point to an <b>unsigned long int</b> variable. These option bits
are those specified in the call to <b>pcre_compile()</b>, modified by any
top-level option settings at the start of the pattern itself. In other words,
they are the options that will be in force when matching starts. For example,
if the pattern /(?im)abc(?-i)d/ is compiled with the PCRE_EXTENDED option, the
result is PCRE_CASELESS, PCRE_MULTILINE, and PCRE_EXTENDED.
</P>
<P>
A pattern is automatically anchored by PCRE if all of its top-level
alternatives begin with one of the following:
<pre>
  ^     unless PCRE_MULTILINE is set
  \A    always
  \G    always
  .*    if PCRE_DOTALL is set and there are no back references to the subpattern in which .* appears
</pre>
For such patterns, the PCRE_ANCHORED bit is set in the options returned by
<b>pcre_fullinfo()</b>.
<pre>
  PCRE_INFO_SIZE
</pre>
Return the size of the compiled pattern, that is, the value that was passed as
the argument to <b>pcre_malloc()</b> when PCRE was getting memory in which to
place the compiled data. The fourth argument should point to a <b>size_t</b>
variable.
<pre>
  PCRE_INFO_STUDYSIZE
</pre>
Return the size of the data block pointed to by the <i>study_data</i> field in
a <b>pcre_extra</b> block. That is, it is the value that was passed to
<b>pcre_malloc()</b> when PCRE was getting memory into which to place the data
created by <b>pcre_study()</b>. If <b>pcre_extra</b> is NULL, or there is no
study data, zero is returned. The fourth argument should point to a
<b>size_t</b> variable.
</P>
<br><a name="SEC12" href="#TOC1">OBSOLETE INFO FUNCTION</a><br>
<P>
<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b>
<b>*<i>firstcharptr</i>);</b>
</P>
<P>
The <b>pcre_info()</b> function is now obsolete because its interface is too
restrictive to return all the available data about a compiled pattern. New
programs should use <b>pcre_fullinfo()</b> instead. The yield of
<b>pcre_info()</b> is the number of capturing subpatterns, or one of the
following negative numbers:
<pre>
  PCRE_ERROR_NULL       the argument <i>code</i> was NULL
  PCRE_ERROR_BADMAGIC   the "magic number" was not found
</pre>
If the <i>optptr</i> argument is not NULL, a copy of the options with which the
pattern was compiled is placed in the integer it points to (see
PCRE_INFO_OPTIONS above).
</P>
<P>
If the pattern is not anchored and the <i>firstcharptr</i> argument is not NULL,
it is used to pass back information about the first character of any matched
string (see PCRE_INFO_FIRSTBYTE above).
</P>
<br><a name="SEC13" href="#TOC1">REFERENCE COUNTS</a><br>
<P>
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
</P>
<P>
The <b>pcre_refcount()</b> function is used to maintain a reference count in the
data block that contains a compiled pattern. It is provided for the benefit of
applications that operate in an object-oriented manner, where different parts
of the application may be using the same compiled pattern, but you want to free
the block when they are all done.
</P>
<P>
When a pattern is compiled, the reference count field is initialized to zero.
It is changed only by calling this function, whose action is to add the
<i>adjust</i> value (which may be positive or negative) to it. The yield of the
function is the new value. However, the value of the count is constrained to
lie between 0 and 65535, inclusive. If the new value is outside these limits,
it is forced to the appropriate limit value.
</P>
<P>
Except when it is zero, the reference count is not correctly preserved if a
pattern is compiled on one host and then transferred to a host whose byte-order
is different. (This seems a highly unlikely scenario.)
</P>
<br><a name="SEC14" href="#TOC1">MATCHING A PATTERN: THE TRADITIONAL FUNCTION</a><br>
<P>
<b>int pcre_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>);</b>
</P>
<P>
The function <b>pcre_exec()</b> is called to match a subject string against a
compiled pattern, which is passed in the <i>code</i> argument. If the
pattern was studied, the result of the study should be passed in the
<i>extra</i> argument. This function is the main matching facility of the
library, and it operates in a Perl-like manner. For specialist use there is
also an alternative matching function, which is described
<a href="#dfamatch">below</a>
in the section about the <b>pcre_dfa_exec()</b> function.
</P>
<P>
In most applications, the pattern will have been compiled (and optionally
studied) in the same process that calls <b>pcre_exec()</b>. However, it is
possible to save compiled patterns and study data, and then use them later
in different processes, possibly even on different hosts. For a discussion
about this, see the
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
documentation.
</P>
<P>
Here is an example of a simple call to <b>pcre_exec()</b>:
<pre>
  int rc;
  int ovector[30];
  rc = pcre_exec(
    re,             /* result of pcre_compile() */
    NULL,           /* we didn't study the pattern */
    "some string",  /* the subject string */
    11,             /* the length of the subject string */
    0,              /* start at offset 0 in the subject */
    0,              /* default options */
    ovector,        /* vector of integers for substring information */
    30);            /* number of elements (NOT size in bytes) */
<a name="extradata"></a></PRE>
</P>
<br><b>
Extra data for <b>pcre_exec()</b>
</b><br>
<P>
If the <i>extra</i> argument is not NULL, it must point to a <b>pcre_extra</b>
data block. The <b>pcre_study()</b> function returns such a block (when it
doesn't return NULL), but you can also create one for yourself, and pass
additional information in it. The <b>pcre_extra</b> block contains the following
fields (not necessarily in this order):
<pre>
  unsigned long int <i>flags</i>;
  void *<i>study_data</i>;
  unsigned long int <i>match_limit</i>;
  unsigned long int <i>match_limit_recursion</i>;
  void *<i>callout_data</i>;
  const unsigned char *<i>tables</i>;
</pre>
The <i>flags</i> field is a bitmap that specifies which of the other fields
are set. The flag bits are:
<pre>
  PCRE_EXTRA_STUDY_DATA
  PCRE_EXTRA_MATCH_LIMIT
  PCRE_EXTRA_MATCH_LIMIT_RECURSION
  PCRE_EXTRA_CALLOUT_DATA
  PCRE_EXTRA_TABLES
</pre>
Other flag bits should be set to zero. The <i>study_data</i> field is set in the
<b>pcre_extra</b> block that is returned by <b>pcre_study()</b>, together with
the appropriate flag bit. You should not set this yourself, but you may add to
the block by setting the other fields and their corresponding flag bits.
</P>
<P>
The <i>match_limit</i> field provides a means of preventing PCRE from using up a
vast amount of resources when running patterns that are not going to match,
but which have a very large number of possibilities in their search trees. The
classic example is a pattern that uses nested unlimited repeats.
</P>
<P>
Internally, PCRE uses a function called <b>match()</b> which it calls repeatedly
(sometimes recursively). The limit set by <i>match_limit</i> is imposed on the
number of times this function is called during a match, which has the effect of
limiting the amount of backtracking that can take place. For patterns that are
not anchored, the count restarts from zero for each position in the subject
string.
</P>
<P>
The default value for the limit can be set when PCRE is built; the default
default is 10 million, which handles all but the most extreme cases. You can
override the default by suppling <b>pcre_exec()</b> with a <b>pcre_extra</b>
block in which <i>match_limit</i> is set, and PCRE_EXTRA_MATCH_LIMIT is set in
the <i>flags</i> field. If the limit is exceeded, <b>pcre_exec()</b> returns
PCRE_ERROR_MATCHLIMIT.
</P>
<P>
The <i>match_limit_recursion</i> field is similar to <i>match_limit</i>, but
instead of limiting the total number of times that <b>match()</b> is called, it
limits the depth of recursion. The recursion depth is a smaller number than the
total number of calls, because not all calls to <b>match()</b> are recursive.
This limit is of use only if it is set smaller than <i>match_limit</i>.
</P>
<P>
Limiting the recursion depth limits the amount of stack that can be used, or,
when PCRE has been compiled to use memory on the heap instead of the stack, the
amount of heap memory that can be used.
</P>
<P>
The default value for <i>match_limit_recursion</i> can be set when PCRE is
built; the default default is the same value as the default for
<i>match_limit</i>. You can override the default by suppling <b>pcre_exec()</b>
with a <b>pcre_extra</b> block in which <i>match_limit_recursion</i> is set, and
PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in the <i>flags</i> field. If the limit
is exceeded, <b>pcre_exec()</b> returns PCRE_ERROR_RECURSIONLIMIT.
</P>
<P>
The <i>callout_data</i> field is used in conjunction with the "callout" feature,
and is described in the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation.
</P>
<P>
The <i>tables</i> field is used to pass a character tables pointer to
<b>pcre_exec()</b>; this overrides the value that is stored with the compiled
pattern. A non-NULL value is stored with the compiled pattern only if custom
tables were supplied to <b>pcre_compile()</b> via its <i>tableptr</i> argument.
If NULL is passed to <b>pcre_exec()</b> using this mechanism, it forces PCRE's
internal tables to be used. This facility is helpful when re-using patterns
that have been saved after compiling with an external set of tables, because
the external tables might be at a different address when <b>pcre_exec()</b> is
called. See the
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
documentation for a discussion of saving compiled patterns for later use.
<a name="execoptions"></a></P>
<br><b>
Option bits for <b>pcre_exec()</b>
</b><br>
<P>
The unused bits of the <i>options</i> argument for <b>pcre_exec()</b> must be
zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_<i>xxx</i>,
PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
PCRE_NO_START_OPTIMIZE, PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_SOFT, and
PCRE_PARTIAL_HARD.
<pre>
  PCRE_ANCHORED
</pre>
The PCRE_ANCHORED option limits <b>pcre_exec()</b> to matching at the first
matching position. If a pattern was compiled with PCRE_ANCHORED, or turned out
to be anchored by virtue of its contents, it cannot be made unachored at
matching time.
<pre>
  PCRE_BSR_ANYCRLF
  PCRE_BSR_UNICODE
</pre>
These options (which are mutually exclusive) control what the \R escape
sequence matches. The choice is either to match only CR, LF, or CRLF, or to
match any Unicode newline sequence. These options override the choice that was
made or defaulted when the pattern was compiled.
<pre>
  PCRE_NEWLINE_CR
  PCRE_NEWLINE_LF
  PCRE_NEWLINE_CRLF
  PCRE_NEWLINE_ANYCRLF
  PCRE_NEWLINE_ANY
</pre>
These options override the newline definition that was chosen or defaulted when
the pattern was compiled. For details, see the description of
<b>pcre_compile()</b> above. During matching, the newline choice affects the
behaviour of the dot, circumflex, and dollar metacharacters. It may also alter
the way the match position is advanced after a match failure for an unanchored
pattern.
</P>
<P>
When PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANYCRLF, or PCRE_NEWLINE_ANY is set, and a
match attempt for an unanchored pattern fails when the current position is at a
CRLF sequence, and the pattern contains no explicit matches for CR or LF
characters, the match position is advanced by two characters instead of one, in
other words, to after the CRLF.
</P>
<P>
The above rule is a compromise that makes the most common cases work as
expected. For example, if the pattern is .+A (and the PCRE_DOTALL option is not
set), it does not match the string "\r\nA" because, after failing at the
start, it skips both the CR and the LF before retrying. However, the pattern
[\r\n]A does match that string, because it contains an explicit CR or LF
reference, and so advances only by one character after the first failure.
</P>
<P>
An explicit match for CR of LF is either a literal appearance of one of those
characters, or one of the \r or \n escape sequences. Implicit matches such as
[^X] do not count, nor does \s (which includes CR and LF in the characters
that it matches).
</P>
<P>
Notwithstanding the above, anomalous effects may still occur when CRLF is a
valid newline sequence and explicit \r or \n escapes appear in the pattern.
<pre>
  PCRE_NOTBOL
</pre>
This option specifies that first character of the subject string is not the
beginning of a line, so the circumflex metacharacter should not match before
it. Setting this without PCRE_MULTILINE (at compile time) causes circumflex
never to match. This option affects only the behaviour of the circumflex
metacharacter. It does not affect \A.
<pre>
  PCRE_NOTEOL
</pre>
This option specifies that the end of the subject string is not the end of a
line, so the dollar metacharacter should not match it nor (except in multiline
mode) a newline immediately before it. Setting this without PCRE_MULTILINE (at
compile time) causes dollar never to match. This option affects only the
behaviour of the dollar metacharacter. It does not affect \Z or \z.
<pre>
  PCRE_NOTEMPTY
</pre>
An empty string is not considered to be a valid match if this option is set. If
there are alternatives in the pattern, they are tried. If all the alternatives
match the empty string, the entire match fails. For example, if the pattern
<pre>
  a?b?
</pre>
is applied to a string not beginning with "a" or "b", it matches an empty
string at the start of the subject. With PCRE_NOTEMPTY set, this match is not
valid, so PCRE searches further into the string for occurrences of "a" or "b".
<pre>
  PCRE_NOTEMPTY_ATSTART
</pre>
This is like PCRE_NOTEMPTY, except that an empty string match that is not at
the start of the subject is permitted. If the pattern is anchored, such a match
can occur only if the pattern contains \K.
</P>
<P>
Perl has no direct equivalent of PCRE_NOTEMPTY or PCRE_NOTEMPTY_ATSTART, but it
does make a special case of a pattern match of the empty string within its
<b>split()</b> function, and when using the /g modifier. It is possible to
emulate Perl's behaviour after matching a null string by first trying the match
again at the same offset with PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED, and then
if that fails, by advancing the starting offset (see below) and trying an
ordinary match again. There is some code that demonstrates how to do this in
the
<a href="pcredemo.html"><b>pcredemo</b></a>
sample program.
<pre>
  PCRE_NO_START_OPTIMIZE
</pre>
There are a number of optimizations that <b>pcre_exec()</b> uses at the start of
a match, in order to speed up the process. For example, if it is known that a
match must start with a specific character, it searches the subject for that
character, and fails immediately if it cannot find it, without actually running
the main matching function. When callouts are in use, these optimizations can
cause them to be skipped. This option disables the "start-up" optimizations,
causing performance to suffer, but ensuring that the callouts do occur.
<pre>
  PCRE_NO_UTF8_CHECK
</pre>
When PCRE_UTF8 is set at compile time, the validity of the subject as a UTF-8
string is automatically checked when <b>pcre_exec()</b> is subsequently called.
The value of <i>startoffset</i> is also checked to ensure that it points to the
start of a UTF-8 character. There is a discussion about the validity of UTF-8
strings in the
<a href="pcre.html#utf8strings">section on UTF-8 support</a>
in the main
<a href="pcre.html"><b>pcre</b></a>
page. If an invalid UTF-8 sequence of bytes is found, <b>pcre_exec()</b> returns
the error PCRE_ERROR_BADUTF8. If <i>startoffset</i> contains an invalid value,
PCRE_ERROR_BADUTF8_OFFSET is returned.
</P>
<P>
If you already know that your subject is valid, and you want to skip these
checks for performance reasons, you can set the PCRE_NO_UTF8_CHECK option when
calling <b>pcre_exec()</b>. You might want to do this for the second and
subsequent calls to <b>pcre_exec()</b> if you are making repeated calls to find
all the matches in a single subject string. However, you should be sure that
the value of <i>startoffset</i> points to the start of a UTF-8 character. When
PCRE_NO_UTF8_CHECK is set, the effect of passing an invalid UTF-8 string as a
subject, or a value of <i>startoffset</i> that does not point to the start of a
UTF-8 character, is undefined. Your program may crash.
<pre>
  PCRE_PARTIAL_HARD
  PCRE_PARTIAL_SOFT
</pre>
These options turn on the partial matching feature. For backwards
compatibility, PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A partial match
occurs if the end of the subject string is reached successfully, but there are
not enough subject characters to complete the match. If this happens when
PCRE_PARTIAL_HARD is set, <b>pcre_exec()</b> immediately returns
PCRE_ERROR_PARTIAL. Otherwise, if PCRE_PARTIAL_SOFT is set, matching continues
by testing any other alternatives. Only if they all fail is PCRE_ERROR_PARTIAL
returned (instead of PCRE_ERROR_NOMATCH). The portion of the string that
was inspected when the partial match was found is set as the first matching
string. There is a more detailed discussion in the
<a href="pcrepartial.html"><b>pcrepartial</b></a>
documentation.
</P>
<br><b>
The string to be matched by <b>pcre_exec()</b>
</b><br>
<P>
The subject string is passed to <b>pcre_exec()</b> as a pointer in
<i>subject</i>, a length (in bytes) in <i>length</i>, and a starting byte offset
in <i>startoffset</i>. In UTF-8 mode, the byte offset must point to the start of
a UTF-8 character. Unlike the pattern string, the subject may contain binary
zero bytes. When the starting offset is zero, the search for a match starts at
the beginning of the subject, and this is by far the most common case.
</P>
<P>
A non-zero starting offset is useful when searching for another match in the
same subject by calling <b>pcre_exec()</b> again after a previous success.
Setting <i>startoffset</i> differs from just passing over a shortened string and
setting PCRE_NOTBOL in the case of a pattern that begins with any kind of
lookbehind. For example, consider the pattern
<pre>
  \Biss\B
</pre>
which finds occurrences of "iss" in the middle of words. (\B matches only if
the current position in the subject is not a word boundary.) When applied to
the string "Mississipi" the first call to <b>pcre_exec()</b> finds the first
occurrence. If <b>pcre_exec()</b> is called again with just the remainder of the
subject, namely "issipi", it does not match, because \B is always false at the
start of the subject, which is deemed to be a word boundary. However, if
<b>pcre_exec()</b> is passed the entire string again, but with <i>startoffset</i>
set to 4, it finds the second occurrence of "iss" because it is able to look
behind the starting point to discover that it is preceded by a letter.
</P>
<P>
If a non-zero starting offset is passed when the pattern is anchored, one
attempt to match at the given offset is made. This can only succeed if the
pattern does not require the match to be at the start of the subject.
</P>
<br><b>
How <b>pcre_exec()</b> returns captured substrings
</b><br>
<P>
In general, a pattern matches a certain portion of the subject, and in
addition, further substrings from the subject may be picked out by parts of the
pattern. Following the usage in Jeffrey Friedl's book, this is called
"capturing" in what follows, and the phrase "capturing subpattern" is used for
a fragment of a pattern that picks out a substring. PCRE supports several other
kinds of parenthesized subpattern that do not cause substrings to be captured.
</P>
<P>
Captured substrings are returned to the caller via a vector of integers whose
address is passed in <i>ovector</i>. The number of elements in the vector is
passed in <i>ovecsize</i>, which must be a non-negative number. <b>Note</b>: this
argument is NOT the size of <i>ovector</i> in bytes.
</P>
<P>
The first two-thirds of the vector is used to pass back captured substrings,
each substring using a pair of integers. The remaining third of the vector is
used as workspace by <b>pcre_exec()</b> while matching capturing subpatterns,
and is not available for passing back information. The number passed in
<i>ovecsize</i> should always be a multiple of three. If it is not, it is
rounded down.
</P>
<P>
When a match is successful, information about captured substrings is returned
in pairs of integers, starting at the beginning of <i>ovector</i>, and
continuing up to two-thirds of its length at the most. The first element of
each pair is set to the byte offset of the first character in a substring, and
the second is set to the byte offset of the first character after the end of a
substring. <b>Note</b>: these values are always byte offsets, even in UTF-8
mode. They are not character counts.
</P>
<P>
The first pair of integers, <i>ovector[0]</i> and <i>ovector[1]</i>, identify the
portion of the subject string matched by the entire pattern. The next pair is
used for the first capturing subpattern, and so on. The value returned by
<b>pcre_exec()</b> is one more than the highest numbered pair that has been set.
For example, if two substrings have been captured, the returned value is 3. If
there are no capturing subpatterns, the return value from a successful match is
1, indicating that just the first pair of offsets has been set.
</P>
<P>
If a capturing subpattern is matched repeatedly, it is the last portion of the
string that it matched that is returned.
</P>
<P>
If the vector is too small to hold all the captured substring offsets, it is
used as far as possible (up to two-thirds of its length), and the function
returns a value of zero. If the substring offsets are not of interest,
<b>pcre_exec()</b> may be called with <i>ovector</i> passed as NULL and
<i>ovecsize</i> as zero. However, if the pattern contains back references and
the <i>ovector</i> is not big enough to remember the related substrings, PCRE
has to get additional memory for use during matching. Thus it is usually
advisable to supply an <i>ovector</i>.
</P>
<P>
The <b>pcre_fullinfo()</b> function can be used to find out how many capturing
subpatterns there are in a compiled pattern. The smallest size for
<i>ovector</i> that will allow for <i>n</i> captured substrings, in addition to
the offsets of the substring matched by the whole pattern, is (<i>n</i>+1)*3.
</P>
<P>
It is possible for capturing subpattern number <i>n+1</i> to match some part of
the subject when subpattern <i>n</i> has not been used at all. For example, if
the string "abc" is matched against the pattern (a|(z))(bc) the return from the
function is 4, and subpatterns 1 and 3 are matched, but 2 is not. When this
happens, both values in the offset pairs corresponding to unused subpatterns
are set to -1.
</P>
<P>
Offset values that correspond to unused subpatterns at the end of the
expression are also set to -1. For example, if the string "abc" is matched
against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not matched. The
return from the function is 2, because the highest used capturing subpattern
number is 1. However, you can refer to the offsets for the second and third
capturing subpatterns if you wish (assuming the vector is large enough, of
course).
</P>
<P>
Some convenience functions are provided for extracting the captured substrings
as separate strings. These are described below.
<a name="errorlist"></a></P>
<br><b>
Error return values from <b>pcre_exec()</b>
</b><br>
<P>
If <b>pcre_exec()</b> fails, it returns a negative number. The following are
defined in the header file:
<pre>
  PCRE_ERROR_NOMATCH        (-1)
</pre>
The subject string did not match the pattern.
<pre>
  PCRE_ERROR_NULL           (-2)
</pre>
Either <i>code</i> or <i>subject</i> was passed as NULL, or <i>ovector</i> was
NULL and <i>ovecsize</i> was not zero.
<pre>
  PCRE_ERROR_BADOPTION      (-3)
</pre>
An unrecognized bit was set in the <i>options</i> argument.
<pre>
  PCRE_ERROR_BADMAGIC       (-4)
</pre>
PCRE stores a 4-byte "magic number" at the start of the compiled code, to catch
the case when it is passed a junk pointer and to detect when a pattern that was
compiled in an environment of one endianness is run in an environment with the
other endianness. This is the error that PCRE gives when the magic number is
not present.
<pre>
  PCRE_ERROR_UNKNOWN_OPCODE (-5)
</pre>
While running the pattern match, an unknown item was encountered in the
compiled pattern. This error could be caused by a bug in PCRE or by overwriting
of the compiled pattern.
<pre>
  PCRE_ERROR_NOMEMORY       (-6)
</pre>
If a pattern contains back references, but the <i>ovector</i> that is passed to
<b>pcre_exec()</b> is not big enough to remember the referenced substrings, PCRE
gets a block of memory at the start of matching to use for this purpose. If the
call via <b>pcre_malloc()</b> fails, this error is given. The memory is
automatically freed at the end of matching.
<pre>
  PCRE_ERROR_NOSUBSTRING    (-7)
</pre>
This error is used by the <b>pcre_copy_substring()</b>,
<b>pcre_get_substring()</b>, and <b>pcre_get_substring_list()</b> functions (see
below). It is never returned by <b>pcre_exec()</b>.
<pre>
  PCRE_ERROR_MATCHLIMIT     (-8)
</pre>
The backtracking limit, as specified by the <i>match_limit</i> field in a
<b>pcre_extra</b> structure (or defaulted) was reached. See the description
above.
<pre>
  PCRE_ERROR_CALLOUT        (-9)
</pre>
This error is never generated by <b>pcre_exec()</b> itself. It is provided for
use by callout functions that want to yield a distinctive error code. See the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation for details.
<pre>
  PCRE_ERROR_BADUTF8        (-10)
</pre>
A string that contains an invalid UTF-8 byte sequence was passed as a subject.
<pre>
  PCRE_ERROR_BADUTF8_OFFSET (-11)
</pre>
The UTF-8 byte sequence that was passed as a subject was valid, but the value
of <i>startoffset</i> did not point to the beginning of a UTF-8 character.
<pre>
  PCRE_ERROR_PARTIAL        (-12)
</pre>
The subject string did not match, but it did match partially. See the
<a href="pcrepartial.html"><b>pcrepartial</b></a>
documentation for details of partial matching.
<pre>
  PCRE_ERROR_BADPARTIAL     (-13)
</pre>
This code is no longer in use. It was formerly returned when the PCRE_PARTIAL
option was used with a compiled pattern containing items that were not
supported for partial matching. From release 8.00 onwards, there are no
restrictions on partial matching.
<pre>
  PCRE_ERROR_INTERNAL       (-14)
</pre>
An unexpected internal error has occurred. This error could be caused by a bug
in PCRE or by overwriting of the compiled pattern.
<pre>
  PCRE_ERROR_BADCOUNT       (-15)
</pre>
This error is given if the value of the <i>ovecsize</i> argument is negative.
<pre>
  PCRE_ERROR_RECURSIONLIMIT (-21)
</pre>
The internal recursion limit, as specified by the <i>match_limit_recursion</i>
field in a <b>pcre_extra</b> structure (or defaulted) was reached. See the
description above.
<pre>
  PCRE_ERROR_BADNEWLINE     (-23)
</pre>
An invalid combination of PCRE_NEWLINE_<i>xxx</i> options was given.
</P>
<P>
Error numbers -16 to -20 and -22 are not used by <b>pcre_exec()</b>.
</P>
<br><a name="SEC15" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NUMBER</a><br>
<P>
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
</P>
<P>
<b>int pcre_get_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>,</b>
<b>const char **<i>stringptr</i>);</b>
</P>
<P>
<b>int pcre_get_substring_list(const char *<i>subject</i>,</b>
<b>int *<i>ovector</i>, int <i>stringcount</i>, const char ***<i>listptr</i>);</b>
</P>
<P>
Captured substrings can be accessed directly by using the offsets returned by
<b>pcre_exec()</b> in <i>ovector</i>. For convenience, the functions
<b>pcre_copy_substring()</b>, <b>pcre_get_substring()</b>, and
<b>pcre_get_substring_list()</b> are provided for extracting captured substrings
as new, separate, zero-terminated strings. These functions identify substrings
by number. The next section describes functions for extracting named
substrings.
</P>
<P>
A substring that contains a binary zero is correctly extracted and has a
further zero added on the end, but the result is not, of course, a C string.
However, you can process such a string by referring to the length that is
returned by <b>pcre_copy_substring()</b> and <b>pcre_get_substring()</b>.
Unfortunately, the interface to <b>pcre_get_substring_list()</b> is not adequate
for handling strings containing binary zeros, because the end of the final
string is not independently indicated.
</P>
<P>
The first three arguments are the same for all three of these functions:
<i>subject</i> is the subject string that has just been successfully matched,
<i>ovector</i> is a pointer to the vector of integer offsets that was passed to
<b>pcre_exec()</b>, and <i>stringcount</i> is the number of substrings that were
captured by the match, including the substring that matched the entire regular
expression. This is the value returned by <b>pcre_exec()</b> if it is greater
than zero. If <b>pcre_exec()</b> returned zero, indicating that it ran out of
space in <i>ovector</i>, the value passed as <i>stringcount</i> should be the
number of elements in the vector divided by three.
</P>
<P>
The functions <b>pcre_copy_substring()</b> and <b>pcre_get_substring()</b>
extract a single substring, whose number is given as <i>stringnumber</i>. A
value of zero extracts the substring that matched the entire pattern, whereas
higher values extract the captured substrings. For <b>pcre_copy_substring()</b>,
the string is placed in <i>buffer</i>, whose length is given by
<i>buffersize</i>, while for <b>pcre_get_substring()</b> a new block of memory is
obtained via <b>pcre_malloc</b>, and its address is returned via
<i>stringptr</i>. The yield of the function is the length of the string, not
including the terminating zero, or one of these error codes:
<pre>
  PCRE_ERROR_NOMEMORY       (-6)
</pre>
The buffer was too small for <b>pcre_copy_substring()</b>, or the attempt to get
memory failed for <b>pcre_get_substring()</b>.
<pre>
  PCRE_ERROR_NOSUBSTRING    (-7)
</pre>
There is no substring whose number is <i>stringnumber</i>.
</P>
<P>
The <b>pcre_get_substring_list()</b> function extracts all available substrings
and builds a list of pointers to them. All this is done in a single block of
memory that is obtained via <b>pcre_malloc</b>. The address of the memory block
is returned via <i>listptr</i>, which is also the start of the list of string
pointers. The end of the list is marked by a NULL pointer. The yield of the
function is zero if all went well, or the error code
<pre>
  PCRE_ERROR_NOMEMORY       (-6)
</pre>
if the attempt to get the memory block failed.
</P>
<P>
When any of these functions encounter a substring that is unset, which can
happen when capturing subpattern number <i>n+1</i> matches some part of the
subject, but subpattern <i>n</i> has not been used at all, they return an empty
string. This can be distinguished from a genuine zero-length substring by
inspecting the appropriate offset in <i>ovector</i>, which is negative for unset
substrings.
</P>
<P>
The two convenience functions <b>pcre_free_substring()</b> and
<b>pcre_free_substring_list()</b> can be used to free the memory returned by
a previous call of <b>pcre_get_substring()</b> or
<b>pcre_get_substring_list()</b>, respectively. They do nothing more than call
the function pointed to by <b>pcre_free</b>, which of course could be called
directly from a C program. However, PCRE is used in some situations where it is
linked via a special interface to another programming language that cannot use
<b>pcre_free</b> directly; it is for these cases that the functions are
provided.
</P>
<br><a name="SEC16" href="#TOC1">EXTRACTING CAPTURED SUBSTRINGS BY NAME</a><br>
<P>
<b>int pcre_get_stringnumber(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>);</b>
</P>
<P>
<b>int pcre_copy_named_substring(const pcre *<i>code</i>,</b>
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>char *<i>buffer</i>, int <i>buffersize</i>);</b>
</P>
<P>
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>const char **<i>stringptr</i>);</b>
</P>
<P>
To extract a substring by name, you first have to find associated number.
For example, for this pattern
<pre>
  (a+)b(?&#60;xxx&#62;\d+)...
</pre>
the number of the subpattern called "xxx" is 2. If the name is known to be
unique (PCRE_DUPNAMES was not set), you can find the number from the name by
calling <b>pcre_get_stringnumber()</b>. The first argument is the compiled
pattern, and the second is the name. The yield of the function is the
subpattern number, or PCRE_ERROR_NOSUBSTRING (-7) if there is no subpattern of
that name.
</P>
<P>
Given the number, you can extract the substring directly, or use one of the
functions described in the previous section. For convenience, there are also
two functions that do the whole job.
</P>
<P>
Most of the arguments of <b>pcre_copy_named_substring()</b> and
<b>pcre_get_named_substring()</b> are the same as those for the similarly named
functions that extract by number. As these are described in the previous
section, they are not re-described here. There are just two differences:
</P>
<P>
First, instead of a substring number, a substring name is given. Second, there
is an extra argument, given at the start, which is a pointer to the compiled
pattern. This is needed in order to gain access to the name-to-number
translation table.
</P>
<P>
These functions call <b>pcre_get_stringnumber()</b>, and if it succeeds, they
then call <b>pcre_copy_substring()</b> or <b>pcre_get_substring()</b>, as
appropriate. <b>NOTE:</b> If PCRE_DUPNAMES is set and there are duplicate names,
the behaviour may not be what you want (see the next section).
</P>
<P>
<b>Warning:</b> If the pattern uses the (?| feature to set up multiple
subpatterns with the same number, as described in the
<a href="pcrepattern.html#dupsubpatternnumber">section on duplicate subpattern numbers</a>
in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
page, you cannot use names to distinguish the different subpatterns, because
names are not included in the compiled code. The matching process uses only
numbers. For this reason, the use of different names for subpatterns of the
same number causes an error at compile time.
</P>
<br><a name="SEC17" href="#TOC1">DUPLICATE SUBPATTERN NAMES</a><br>
<P>
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
</P>
<P>
When a pattern is compiled with the PCRE_DUPNAMES option, names for subpatterns
are not required to be unique. (Duplicate names are always allowed for
subpatterns with the same number, created by using the (?| feature. Indeed, if
such subpatterns are named, they are required to use the same names.)
</P>
<P>
Normally, patterns with duplicate names are such that in any one match, only
one of the named subpatterns participates. An example is shown in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation.
</P>
<P>
When duplicates are present, <b>pcre_copy_named_substring()</b> and
<b>pcre_get_named_substring()</b> return the first substring corresponding to
the given name that is set. If none are set, PCRE_ERROR_NOSUBSTRING (-7) is
returned; no data is returned. The <b>pcre_get_stringnumber()</b> function
returns one of the numbers that are associated with the name, but it is not
defined which it is.
</P>
<P>
If you want to get full details of all captured substrings for a given name,
you must use the <b>pcre_get_stringtable_entries()</b> function. The first
argument is the compiled pattern, and the second is the name. The third and
fourth are pointers to variables which are updated by the function. After it
has run, they point to the first and last entries in the name-to-number table
for the given name. The function itself returns the length of each entry, or
PCRE_ERROR_NOSUBSTRING (-7) if there are none. The format of the table is
described above in the section entitled <i>Information about a pattern</i>.
Given all the relevant entries for the name, you can extract each of their
numbers, and hence the captured data, if any.
</P>
<br><a name="SEC18" href="#TOC1">FINDING ALL POSSIBLE MATCHES</a><br>
<P>
The traditional matching function uses a similar algorithm to Perl, which stops
when it finds the first match, starting at a given point in the subject. If you
want to find all possible matches, or the longest possible match, consider
using the alternative matching function (see below) instead. If you cannot use
the alternative function, but still need to find all possible matches, you
can kludge it up by making use of the callout facility, which is described in
the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation.
</P>
<P>
What you have to do is to insert a callout right at the end of the pattern.
When your callout function is called, extract and save the current matched
substring. Then return 1, which forces <b>pcre_exec()</b> to backtrack and try
other alternatives. Ultimately, when it runs out of matches, <b>pcre_exec()</b>
will yield PCRE_ERROR_NOMATCH.
<a name="dfamatch"></a></P>
<br><a name="SEC19" href="#TOC1">MATCHING A PATTERN: THE ALTERNATIVE FUNCTION</a><br>
<P>
<b>int pcre_dfa_exec(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>const char *<i>subject</i>, int <i>length</i>, int <i>startoffset</i>,</b>
<b>int <i>options</i>, int *<i>ovector</i>, int <i>ovecsize</i>,</b>
<b>int *<i>workspace</i>, int <i>wscount</i>);</b>
</P>
<P>
The function <b>pcre_dfa_exec()</b> is called to match a subject string against
a compiled pattern, using a matching algorithm that scans the subject string
just once, and does not backtrack. This has different characteristics to the
normal algorithm, and is not compatible with Perl. Some of the features of PCRE
patterns are not supported. Nevertheless, there are times when this kind of
matching can be useful. For a discussion of the two matching algorithms, and a
list of features that <b>pcre_dfa_exec()</b> does not support, see the
<a href="pcrematching.html"><b>pcrematching</b></a>
documentation.
</P>
<P>
The arguments for the <b>pcre_dfa_exec()</b> function are the same as for
<b>pcre_exec()</b>, plus two extras. The <i>ovector</i> argument is used in a
different way, and this is described below. The other common arguments are used
in the same way as for <b>pcre_exec()</b>, so their description is not repeated
here.
</P>
<P>
The two additional arguments provide workspace for the function. The workspace
vector should contain at least 20 elements. It is used for keeping track of
multiple paths through the pattern tree. More workspace will be needed for
patterns and subjects where there are a lot of potential matches.
</P>
<P>
Here is an example of a simple call to <b>pcre_dfa_exec()</b>:
<pre>
  int rc;
  int ovector[10];
  int wspace[20];
  rc = pcre_dfa_exec(
    re,             /* result of pcre_compile() */
    NULL,           /* we didn't study the pattern */
    "some string",  /* the subject string */
    11,             /* the length of the subject string */
    0,              /* start at offset 0 in the subject */
    0,              /* default options */
    ovector,        /* vector of integers for substring information */
    10,             /* number of elements (NOT size in bytes) */
    wspace,         /* working space vector */
    20);            /* number of elements (NOT size in bytes) */
</PRE>
</P>
<br><b>
Option bits for <b>pcre_dfa_exec()</b>
</b><br>
<P>
The unused bits of the <i>options</i> argument for <b>pcre_dfa_exec()</b> must be
zero. The only bits that may be set are PCRE_ANCHORED, PCRE_NEWLINE_<i>xxx</i>,
PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_HARD, PCRE_PARTIAL_SOFT, PCRE_DFA_SHORTEST,
and PCRE_DFA_RESTART. All but the last four of these are exactly the same as
for <b>pcre_exec()</b>, so their description is not repeated here.
<pre>
  PCRE_PARTIAL_HARD
  PCRE_PARTIAL_SOFT
</pre>
These have the same general effect as they do for <b>pcre_exec()</b>, but the
details are slightly different. When PCRE_PARTIAL_HARD is set for
<b>pcre_dfa_exec()</b>, it returns PCRE_ERROR_PARTIAL if the end of the subject
is reached and there is still at least one matching possibility that requires
additional characters. This happens even if some complete matches have also
been found. When PCRE_PARTIAL_SOFT is set, the return code PCRE_ERROR_NOMATCH
is converted into PCRE_ERROR_PARTIAL if the end of the subject is reached,
there have been no complete matches, but there is still at least one matching
possibility. The portion of the string that was inspected when the longest
partial match was found is set as the first matching string in both cases.
<pre>
  PCRE_DFA_SHORTEST
</pre>
Setting the PCRE_DFA_SHORTEST option causes the matching algorithm to stop as
soon as it has found one match. Because of the way the alternative algorithm
works, this is necessarily the shortest possible match at the first possible
matching point in the subject string.
<pre>
  PCRE_DFA_RESTART
</pre>
When <b>pcre_dfa_exec()</b> returns a partial match, it is possible to call it
again, with additional subject characters, and have it continue with the same
match. The PCRE_DFA_RESTART option requests this action; when it is set, the
<i>workspace</i> and <i>wscount</i> options must reference the same vector as
before because data about the match so far is left in them after a partial
match. There is more discussion of this facility in the
<a href="pcrepartial.html"><b>pcrepartial</b></a>
documentation.
</P>
<br><b>
Successful returns from <b>pcre_dfa_exec()</b>
</b><br>
<P>
When <b>pcre_dfa_exec()</b> succeeds, it may have matched more than one
substring in the subject. Note, however, that all the matches from one run of
the function start at the same point in the subject. The shorter matches are
all initial substrings of the longer matches. For example, if the pattern
<pre>
  &#60;.*&#62;
</pre>
is matched against the string
<pre>
  This is &#60;something&#62; &#60;something else&#62; &#60;something further&#62; no more
</pre>
the three matched strings are
<pre>
  &#60;something&#62;
  &#60;something&#62; &#60;something else&#62;
  &#60;something&#62; &#60;something else&#62; &#60;something further&#62;
</pre>
On success, the yield of the function is a number greater than zero, which is
the number of matched substrings. The substrings themselves are returned in
<i>ovector</i>. Each string uses two elements; the first is the offset to the
start, and the second is the offset to the end. In fact, all the strings have
the same start offset. (Space could have been saved by giving this only once,
but it was decided to retain some compatibility with the way <b>pcre_exec()</b>
returns data, even though the meaning of the strings is different.)
</P>
<P>
The strings are returned in reverse order of length; that is, the longest
matching string is given first. If there were too many matches to fit into
<i>ovector</i>, the yield of the function is zero, and the vector is filled with
the longest matches.
</P>
<br><b>
Error returns from <b>pcre_dfa_exec()</b>
</b><br>
<P>
The <b>pcre_dfa_exec()</b> function returns a negative number when it fails.
Many of the errors are the same as for <b>pcre_exec()</b>, and these are
described
<a href="#errorlist">above.</a>
There are in addition the following errors that are specific to
<b>pcre_dfa_exec()</b>:
<pre>
  PCRE_ERROR_DFA_UITEM      (-16)
</pre>
This return is given if <b>pcre_dfa_exec()</b> encounters an item in the pattern
that it does not support, for instance, the use of \C or a back reference.
<pre>
  PCRE_ERROR_DFA_UCOND      (-17)
</pre>
This return is given if <b>pcre_dfa_exec()</b> encounters a condition item that
uses a back reference for the condition, or a test for recursion in a specific
group. These are not supported.
<pre>
  PCRE_ERROR_DFA_UMLIMIT    (-18)
</pre>
This return is given if <b>pcre_dfa_exec()</b> is called with an <i>extra</i>
block that contains a setting of the <i>match_limit</i> field. This is not
supported (it is meaningless).
<pre>
  PCRE_ERROR_DFA_WSSIZE     (-19)
</pre>
This return is given if <b>pcre_dfa_exec()</b> runs out of space in the
<i>workspace</i> vector.
<pre>
  PCRE_ERROR_DFA_RECURSE    (-20)
</pre>
When a recursive subpattern is processed, the matching function calls itself
recursively, using private vectors for <i>ovector</i> and <i>workspace</i>. This
error is given if the output vector is not large enough. This should be
extremely rare, as a vector of size 1000 is used.
</P>
<br><a name="SEC20" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcrebuild</b>(3), <b>pcrecallout</b>(3), <b>pcrecpp(3)</b>(3),
<b>pcrematching</b>(3), <b>pcrepartial</b>(3), <b>pcreposix</b>(3),
<b>pcreprecompile</b>(3), <b>pcresample</b>(3), <b>pcrestack</b>(3).
</P>
<br><a name="SEC21" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC22" href="#TOC1">REVISION</a><br>
<P>
Last updated: 03 October 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_copy_substring.html000064400000003167150403561470020213 0ustar00<html>
<head>
<title>pcre_copy_substring specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_copy_substring man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_copy_substring(const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, int <i>stringnumber</i>, char *<i>buffer</i>,</b>
<b>int <i>buffersize</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting a captured substring into a given
buffer. The arguments are:
<pre>
  <i>subject</i>       Subject that has been successfully matched
  <i>ovector</i>       Offset vector that <b>pcre_exec()</b> used
  <i>stringcount</i>   Value returned by <b>pcre_exec()</b>
  <i>stringnumber</i>  Number of the required substring
  <i>buffer</i>        Buffer to receive the string
  <i>buffersize</i>    Size of buffer
</pre>
The yield is the length of the string, PCRE_ERROR_NOMEMORY if the buffer was
too small, or PCRE_ERROR_NOSUBSTRING if the string number is invalid.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcrecompat.html000064400000016575150403561470016274 0ustar00<html>
<head>
<title>pcrecompat specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrecompat man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
DIFFERENCES BETWEEN PCRE AND PERL
</b><br>
<P>
This document describes the differences in the ways that PCRE and Perl handle
regular expressions. The differences described here are with respect to Perl
5.10.
</P>
<P>
1. PCRE has only a subset of Perl's UTF-8 and Unicode support. Details of what
it does have are given in the
<a href="pcre.html#utf8support">section on UTF-8 support</a>
in the main
<a href="pcre.html"><b>pcre</b></a>
page.
</P>
<P>
2. PCRE does not allow repeat quantifiers on lookahead assertions. Perl permits
them, but they do not mean what you might think. For example, (?!a){3} does
not assert that the next three characters are not "a". It just asserts that the
next character is not "a" three times.
</P>
<P>
3. Capturing subpatterns that occur inside negative lookahead assertions are
counted, but their entries in the offsets vector are never set. Perl sets its
numerical variables from any such patterns that are matched before the
assertion fails to match something (thereby succeeding), but only if the
negative lookahead assertion contains just one branch.
</P>
<P>
4. Though binary zero characters are supported in the subject string, they are
not allowed in a pattern string because it is passed as a normal C string,
terminated by zero. The escape sequence \0 can be used in the pattern to
represent a binary zero.
</P>
<P>
5. The following Perl escape sequences are not supported: \l, \u, \L,
\U, and \N. In fact these are implemented by Perl's general string-handling
and are not part of its pattern matching engine. If any of these are
encountered by PCRE, an error is generated.
</P>
<P>
6. The Perl escape sequences \p, \P, and \X are supported only if PCRE is
built with Unicode character property support. The properties that can be
tested with \p and \P are limited to the general category properties such as
Lu and Nd, script names such as Greek or Han, and the derived properties Any
and L&. PCRE does support the Cs (surrogate) property, which Perl does not; the
Perl documentation says "Because Perl hides the need for the user to understand
the internal representation of Unicode characters, there is no need to
implement the somewhat messy concept of surrogates."
</P>
<P>
7. PCRE does support the \Q...\E escape for quoting substrings. Characters in
between are treated as literals. This is slightly different from Perl in that $
and @ are also handled as literals inside the quotes. In Perl, they cause
variable interpolation (but of course PCRE does not have variables). Note the
following examples:
<pre>
    Pattern            PCRE matches      Perl matches

    \Qabc$xyz\E        abc$xyz           abc followed by the contents of $xyz
    \Qabc\$xyz\E       abc\$xyz          abc\$xyz
    \Qabc\E\$\Qxyz\E   abc$xyz           abc$xyz
</pre>
The \Q...\E sequence is recognized both inside and outside character classes.
</P>
<P>
8. Fairly obviously, PCRE does not support the (?{code}) and (??{code})
constructions. However, there is support for recursive patterns. This is not
available in Perl 5.8, but it is in Perl 5.10. Also, the PCRE "callout"
feature allows an external function to be called during pattern matching. See
the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation for details.
</P>
<P>
9. Subpatterns that are called recursively or as "subroutines" are always
treated as atomic groups in PCRE. This is like Python, but unlike Perl. There
is a discussion of an example that explains this in more detail in the
<a href="pcrepattern.html#recursiondifference">section on recursion differences from Perl</a>
in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
page.
</P>
<P>
10. There are some differences that are concerned with the settings of captured
strings when part of a pattern is repeated. For example, matching "aba" against
the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE it is set to "b".
</P>
<P>
11. PCRE does support Perl 5.10's backtracking verbs (*ACCEPT), (*FAIL), (*F),
(*COMMIT), (*PRUNE), (*SKIP), and (*THEN), but only in the forms without an
argument. PCRE does not support (*MARK).
</P>
<P>
12. PCRE's handling of duplicate subpattern numbers and duplicate subpattern
names is not as general as Perl's. This is a consequence of the fact the PCRE
works internally just with numbers, using an external table to translate
between numbers and names. In particular, a pattern such as (?|(?&#60;a&#62;A)|(?&#60;b)B),
where the two capturing parentheses have the same number but different names,
is not supported, and causes an error at compile time. If it were allowed, it
would not be possible to distinguish which parentheses matched, because both
names map to capturing subpattern number 1. To avoid this confusing situation,
an error is given at compile time.
</P>
<P>
13. PCRE provides some extensions to the Perl regular expression facilities.
Perl 5.10 includes new features that are not in earlier versions of Perl, some
of which (such as named parentheses) have been in PCRE for some time. This list
is with respect to Perl 5.10:
<br>
<br>
(a) Although lookbehind assertions in PCRE must match fixed length strings,
each alternative branch of a lookbehind assertion can match a different length
of string. Perl requires them all to have the same length.
<br>
<br>
(b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the $
meta-character matches only at the very end of the string.
<br>
<br>
(c) If PCRE_EXTRA is set, a backslash followed by a letter with no special
meaning is faulted. Otherwise, like Perl, the backslash is quietly ignored.
(Perl can be made to issue a warning.)
<br>
<br>
(d) If PCRE_UNGREEDY is set, the greediness of the repetition quantifiers is
inverted, that is, by default they are not greedy, but if followed by a
question mark they are.
<br>
<br>
(e) PCRE_ANCHORED can be used at matching time to force a pattern to be tried
only at the first matching position in the subject string.
<br>
<br>
(f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART, and
PCRE_NO_AUTO_CAPTURE options for <b>pcre_exec()</b> have no Perl equivalents.
<br>
<br>
(g) The \R escape sequence can be restricted to match only CR, LF, or CRLF
by the PCRE_BSR_ANYCRLF option.
<br>
<br>
(h) The callout facility is PCRE-specific.
<br>
<br>
(i) The partial matching facility is PCRE-specific.
<br>
<br>
(j) Patterns compiled by PCRE can be saved and re-used at a later time, even on
different hosts that have the other endianness.
<br>
<br>
(k) The alternative matching function (<b>pcre_dfa_exec()</b>) matches in a
different way and is not Perl-compatible.
<br>
<br>
(l) PCRE recognizes some special sequences such as (*CR) at the start of
a pattern that set overall options that cannot be changed within the pattern.
</P>
<br><b>
AUTHOR
</b><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><b>
REVISION
</b><br>
<P>
Last updated: 04 October 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcredemo.html000064400000031723150403561500015717 0ustar00<html>
<head>
<title>pcredemo specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcredemo man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
</ul>
<PRE>
/*************************************************
*           PCRE DEMONSTRATION PROGRAM           *
*************************************************/

/* This is a demonstration program to illustrate the most straightforward ways
of calling the PCRE regular expression library from a C program. See the
pcresample documentation for a short discussion ("man pcresample" if you have
the PCRE man pages installed).

In Unix-like environments, if PCRE is installed in your standard system
libraries, you should be able to compile this program using this command:

gcc -Wall pcredemo.c -lpcre -o pcredemo

If PCRE is not installed in a standard place, it is likely to be installed with
support for the pkg-config mechanism. If you have pkg-config, you can compile
this program using this command:

gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo

If you do not have pkg-config, you may have to use this:

gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
  -R/usr/local/lib -lpcre -o pcredemo

Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
library files for PCRE are installed on your system. Only some operating
systems (e.g. Solaris) use the -R option.

Building under Windows:

If you want to statically link this program against a non-dll .a file, you must
define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
pcre_free() exported functions will be declared __declspec(dllimport), with
unwanted results. So in this environment, uncomment the following line. */

/* #define PCRE_STATIC */

#include &lt;stdio.h&gt;
#include &lt;string.h&gt;
#include &lt;pcre.h&gt;

#define OVECCOUNT 30    /* should be a multiple of 3 */


int main(int argc, char **argv)
{
pcre *re;
const char *error;
char *pattern;
char *subject;
unsigned char *name_table;
int erroffset;
int find_all;
int namecount;
int name_entry_size;
int ovector[OVECCOUNT];
int subject_length;
int rc, i;


/**************************************************************************
* First, sort out the command line. There is only one possible option at  *
* the moment, "-g" to request repeated matching to find all occurrences,  *
* like Perl's /g option. We set the variable find_all to a non-zero value *
* if the -g option is present. Apart from that, there must be exactly two *
* arguments.                                                              *
**************************************************************************/

find_all = 0;
for (i = 1; i &lt; argc; i++)
  {
  if (strcmp(argv[i], "-g") == 0) find_all = 1;
    else break;
  }

/* After the options, we require exactly two arguments, which are the pattern,
and the subject string. */

if (argc - i != 2)
  {
  printf("Two arguments required: a regex and a subject string\n");
  return 1;
  }

pattern = argv[i];
subject = argv[i+1];
subject_length = (int)strlen(subject);


/*************************************************************************
* Now we are going to compile the regular expression pattern, and handle *
* and errors that are detected.                                          *
*************************************************************************/

re = pcre_compile(
  pattern,              /* the pattern */
  0,                    /* default options */
  &amp;error,               /* for error message */
  &amp;erroffset,           /* for error offset */
  NULL);                /* use default character tables */

/* Compilation failed: print the error message and exit */

if (re == NULL)
  {
  printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
  return 1;
  }


/*************************************************************************
* If the compilation succeeded, we call PCRE again, in order to do a     *
* pattern match against the subject string. This does just ONE match. If *
* further matching is needed, it will be done below.                     *
*************************************************************************/

rc = pcre_exec(
  re,                   /* the compiled pattern */
  NULL,                 /* no extra data - we didn't study the pattern */
  subject,              /* the subject string */
  subject_length,       /* the length of the subject */
  0,                    /* start at offset 0 in the subject */
  0,                    /* default options */
  ovector,              /* output vector for substring information */
  OVECCOUNT);           /* number of elements in the output vector */

/* Matching failed: handle error cases */

if (rc &lt; 0)
  {
  switch(rc)
    {
    case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
    /*
    Handle other special cases if you like
    */
    default: printf("Matching error %d\n", rc); break;
    }
  pcre_free(re);     /* Release memory used for the compiled pattern */
  return 1;
  }

/* Match succeded */

printf("\nMatch succeeded at offset %d\n", ovector[0]);


/*************************************************************************
* We have found the first match within the subject string. If the output *
* vector wasn't big enough, say so. Then output any substrings that were *
* captured.                                                              *
*************************************************************************/

/* The output vector wasn't big enough */

if (rc == 0)
  {
  rc = OVECCOUNT/3;
  printf("ovector only has room for %d captured substrings\n", rc - 1);
  }

/* Show substrings stored in the output vector by number. Obviously, in a real
application you might want to do things other than print them. */

for (i = 0; i &lt; rc; i++)
  {
  char *substring_start = subject + ovector[2*i];
  int substring_length = ovector[2*i+1] - ovector[2*i];
  printf("%2d: %.*s\n", i, substring_length, substring_start);
  }


/**************************************************************************
* That concludes the basic part of this demonstration program. We have    *
* compiled a pattern, and performed a single match. The code that follows *
* shows first how to access named substrings, and then how to code for    *
* repeated matches on the same subject.                                   *
**************************************************************************/

/* See if there are any named substrings, and if so, show them by name. First
we have to extract the count of named parentheses from the pattern. */

(void)pcre_fullinfo(
  re,                   /* the compiled pattern */
  NULL,                 /* no extra data - we didn't study the pattern */
  PCRE_INFO_NAMECOUNT,  /* number of named substrings */
  &amp;namecount);          /* where to put the answer */

if (namecount &lt;= 0) printf("No named substrings\n"); else
  {
  unsigned char *tabptr;
  printf("Named substrings\n");

  /* Before we can access the substrings, we must extract the table for
  translating names to numbers, and the size of each entry in the table. */

  (void)pcre_fullinfo(
    re,                       /* the compiled pattern */
    NULL,                     /* no extra data - we didn't study the pattern */
    PCRE_INFO_NAMETABLE,      /* address of the table */
    &amp;name_table);             /* where to put the answer */

  (void)pcre_fullinfo(
    re,                       /* the compiled pattern */
    NULL,                     /* no extra data - we didn't study the pattern */
    PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
    &amp;name_entry_size);        /* where to put the answer */

  /* Now we can scan the table and, for each entry, print the number, the name,
  and the substring itself. */

  tabptr = name_table;
  for (i = 0; i &lt; namecount; i++)
    {
    int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
    printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
      ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
    tabptr += name_entry_size;
    }
  }


/*************************************************************************
* If the "-g" option was given on the command line, we want to continue  *
* to search for additional matches in the subject string, in a similar   *
* way to the /g option in Perl. This turns out to be trickier than you   *
* might think because of the possibility of matching an empty string.    *
* What happens is as follows:                                            *
*                                                                        *
* If the previous match was NOT for an empty string, we can just start   *
* the next match at the end of the previous one.                         *
*                                                                        *
* If the previous match WAS for an empty string, we can't do that, as it *
* would lead to an infinite loop. Instead, a special call of pcre_exec() *
* is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
* The first of these tells PCRE that an empty string at the start of the *
* subject is not a valid match; other possibilities must be tried. The   *
* second flag restricts PCRE to one match attempt at the initial string  *
* position. If this match succeeds, an alternative to the empty string   *
* match has been found, and we can proceed round the loop.               *
*************************************************************************/

if (!find_all)
  {
  pcre_free(re);   /* Release the memory used for the compiled pattern */
  return 0;        /* Finish unless -g was given */
  }

/* Loop for second and subsequent matches */

for (;;)
  {
  int options = 0;                 /* Normally no options */
  int start_offset = ovector[1];   /* Start at end of previous match */

  /* If the previous match was for an empty string, we are finished if we are
  at the end of the subject. Otherwise, arrange to run another match at the
  same point to see if a non-empty match can be found. */

  if (ovector[0] == ovector[1])
    {
    if (ovector[0] == subject_length) break;
    options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
    }

  /* Run the next matching operation */

  rc = pcre_exec(
    re,                   /* the compiled pattern */
    NULL,                 /* no extra data - we didn't study the pattern */
    subject,              /* the subject string */
    subject_length,       /* the length of the subject */
    start_offset,         /* starting offset in the subject */
    options,              /* options */
    ovector,              /* output vector for substring information */
    OVECCOUNT);           /* number of elements in the output vector */

  /* This time, a result of NOMATCH isn't an error. If the value in "options"
  is zero, it just means we have found all possible matches, so the loop ends.
  Otherwise, it means we have failed to find a non-empty-string match at a
  point where there was a previous empty-string match. In this case, we do what
  Perl does: advance the matching position by one, and continue. We do this by
  setting the "end of previous match" offset, because that is picked up at the
  top of the loop as the point at which to start again. */

  if (rc == PCRE_ERROR_NOMATCH)
    {
    if (options == 0) break;
    ovector[1] = start_offset + 1;
    continue;    /* Go round the loop again */
    }

  /* Other matching errors are not recoverable. */

  if (rc &lt; 0)
    {
    printf("Matching error %d\n", rc);
    pcre_free(re);    /* Release memory used for the compiled pattern */
    return 1;
    }

  /* Match succeded */

  printf("\nMatch succeeded again at offset %d\n", ovector[0]);

  /* The match succeeded, but the output vector wasn't big enough. */

  if (rc == 0)
    {
    rc = OVECCOUNT/3;
    printf("ovector only has room for %d captured substrings\n", rc - 1);
    }

  /* As before, show substrings stored in the output vector by number, and then
  also any named substrings. */

  for (i = 0; i &lt; rc; i++)
    {
    char *substring_start = subject + ovector[2*i];
    int substring_length = ovector[2*i+1] - ovector[2*i];
    printf("%2d: %.*s\n", i, substring_length, substring_start);
    }

  if (namecount &lt;= 0) printf("No named substrings\n"); else
    {
    unsigned char *tabptr = name_table;
    printf("Named substrings\n");
    for (i = 0; i &lt; namecount; i++)
      {
      int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
      printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
        ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
      tabptr += name_entry_size;
      }
    }
  }      /* End of loop to find second and subsequent matches */

printf("\n");
pcre_free(re);       /* Release memory used for the compiled pattern */
return 0;
}

/* End of pcredemo.c */
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcresyntax.html000064400000033235150403561500016321 0ustar00<html>
<head>
<title>pcresyntax specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcresyntax man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a>
<li><a name="TOC2" href="#SEC2">QUOTING</a>
<li><a name="TOC3" href="#SEC3">CHARACTERS</a>
<li><a name="TOC4" href="#SEC4">CHARACTER TYPES</a>
<li><a name="TOC5" href="#SEC5">GENERAL CATEGORY PROPERTY CODES FOR \p and \P</a>
<li><a name="TOC6" href="#SEC6">SCRIPT NAMES FOR \p AND \P</a>
<li><a name="TOC7" href="#SEC7">CHARACTER CLASSES</a>
<li><a name="TOC8" href="#SEC8">QUANTIFIERS</a>
<li><a name="TOC9" href="#SEC9">ANCHORS AND SIMPLE ASSERTIONS</a>
<li><a name="TOC10" href="#SEC10">MATCH POINT RESET</a>
<li><a name="TOC11" href="#SEC11">ALTERNATION</a>
<li><a name="TOC12" href="#SEC12">CAPTURING</a>
<li><a name="TOC13" href="#SEC13">ATOMIC GROUPS</a>
<li><a name="TOC14" href="#SEC14">COMMENT</a>
<li><a name="TOC15" href="#SEC15">OPTION SETTING</a>
<li><a name="TOC16" href="#SEC16">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a>
<li><a name="TOC17" href="#SEC17">BACKREFERENCES</a>
<li><a name="TOC18" href="#SEC18">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a>
<li><a name="TOC19" href="#SEC19">CONDITIONAL PATTERNS</a>
<li><a name="TOC20" href="#SEC20">BACKTRACKING CONTROL</a>
<li><a name="TOC21" href="#SEC21">NEWLINE CONVENTIONS</a>
<li><a name="TOC22" href="#SEC22">WHAT \R MATCHES</a>
<li><a name="TOC23" href="#SEC23">CALLOUTS</a>
<li><a name="TOC24" href="#SEC24">SEE ALSO</a>
<li><a name="TOC25" href="#SEC25">AUTHOR</a>
<li><a name="TOC26" href="#SEC26">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE REGULAR EXPRESSION SYNTAX SUMMARY</a><br>
<P>
The full syntax and semantics of the regular expressions that are supported by
PCRE are described in the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation. This document contains just a quick-reference summary of the
syntax.
</P>
<br><a name="SEC2" href="#TOC1">QUOTING</a><br>
<P>
<pre>
  \x         where x is non-alphanumeric is a literal x
  \Q...\E    treat enclosed characters as literal
</PRE>
</P>
<br><a name="SEC3" href="#TOC1">CHARACTERS</a><br>
<P>
<pre>
  \a         alarm, that is, the BEL character (hex 07)
  \cx        "control-x", where x is any character
  \e         escape (hex 1B)
  \f         formfeed (hex 0C)
  \n         newline (hex 0A)
  \r         carriage return (hex 0D)
  \t         tab (hex 09)
  \ddd       character with octal code ddd, or backreference
  \xhh       character with hex code hh
  \x{hhh..}  character with hex code hhh..
</PRE>
</P>
<br><a name="SEC4" href="#TOC1">CHARACTER TYPES</a><br>
<P>
<pre>
  .          any character except newline;
               in dotall mode, any character whatsoever
  \C         one byte, even in UTF-8 mode (best avoided)
  \d         a decimal digit
  \D         a character that is not a decimal digit
  \h         a horizontal whitespace character
  \H         a character that is not a horizontal whitespace character
  \p{<i>xx</i>}     a character with the <i>xx</i> property
  \P{<i>xx</i>}     a character without the <i>xx</i> property
  \R         a newline sequence
  \s         a whitespace character
  \S         a character that is not a whitespace character
  \v         a vertical whitespace character
  \V         a character that is not a vertical whitespace character
  \w         a "word" character
  \W         a "non-word" character
  \X         an extended Unicode sequence
</pre>
In PCRE, \d, \D, \s, \S, \w, and \W recognize only ASCII characters.
</P>
<br><a name="SEC5" href="#TOC1">GENERAL CATEGORY PROPERTY CODES FOR \p and \P</a><br>
<P>
<pre>
  C          Other
  Cc         Control
  Cf         Format
  Cn         Unassigned
  Co         Private use
  Cs         Surrogate

  L          Letter
  Ll         Lower case letter
  Lm         Modifier letter
  Lo         Other letter
  Lt         Title case letter
  Lu         Upper case letter
  L&         Ll, Lu, or Lt

  M          Mark
  Mc         Spacing mark
  Me         Enclosing mark
  Mn         Non-spacing mark

  N          Number
  Nd         Decimal number
  Nl         Letter number
  No         Other number

  P          Punctuation
  Pc         Connector punctuation
  Pd         Dash punctuation
  Pe         Close punctuation
  Pf         Final punctuation
  Pi         Initial punctuation
  Po         Other punctuation
  Ps         Open punctuation

  S          Symbol
  Sc         Currency symbol
  Sk         Modifier symbol
  Sm         Mathematical symbol
  So         Other symbol

  Z          Separator
  Zl         Line separator
  Zp         Paragraph separator
  Zs         Space separator
</PRE>
</P>
<br><a name="SEC6" href="#TOC1">SCRIPT NAMES FOR \p AND \P</a><br>
<P>
Arabic,
Armenian,
Avestan,
Balinese,
Bamum,
Bengali,
Bopomofo,
Braille,
Buginese,
Buhid,
Canadian_Aboriginal,
Carian,
Cham,
Cherokee,
Common,
Coptic,
Cuneiform,
Cypriot,
Cyrillic,
Deseret,
Devanagari,
Egyptian_Hieroglyphs,
Ethiopic,
Georgian,
Glagolitic,
Gothic,
Greek,
Gujarati,
Gurmukhi,
Han,
Hangul,
Hanunoo,
Hebrew,
Hiragana,
Imperial_Aramaic,
Inherited,
Inscriptional_Pahlavi,
Inscriptional_Parthian,
Javanese,
Kaithi,
Kannada,
Katakana,
Kayah_Li,
Kharoshthi,
Khmer,
Lao,
Latin,
Lepcha,
Limbu,
Linear_B,
Lisu,
Lycian,
Lydian,
Malayalam,
Meetei_Mayek,
Mongolian,
Myanmar,
New_Tai_Lue,
Nko,
Ogham,
Old_Italic,
Old_Persian,
Old_South_Arabian,
Old_Turkic,
Ol_Chiki,
Oriya,
Osmanya,
Phags_Pa,
Phoenician,
Rejang,
Runic,
Samaritan,
Saurashtra,
Shavian,
Sinhala,
Sundanese,
Syloti_Nagri,
Syriac,
Tagalog,
Tagbanwa,
Tai_Le,
Tai_Tham,
Tai_Viet,
Tamil,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Ugaritic,
Vai,
Yi.
</P>
<br><a name="SEC7" href="#TOC1">CHARACTER CLASSES</a><br>
<P>
<pre>
  [...]       positive character class
  [^...]      negative character class
  [x-y]       range (can be used for hex characters)
  [[:xxx:]]   positive POSIX named set
  [[:^xxx:]]  negative POSIX named set

  alnum       alphanumeric
  alpha       alphabetic
  ascii       0-127
  blank       space or tab
  cntrl       control character
  digit       decimal digit
  graph       printing, excluding space
  lower       lower case letter
  print       printing, including space
  punct       printing, excluding alphanumeric
  space       whitespace
  upper       upper case letter
  word        same as \w
  xdigit      hexadecimal digit
</pre>
In PCRE, POSIX character set names recognize only ASCII characters. You can use
\Q...\E inside a character class.
</P>
<br><a name="SEC8" href="#TOC1">QUANTIFIERS</a><br>
<P>
<pre>
  ?           0 or 1, greedy
  ?+          0 or 1, possessive
  ??          0 or 1, lazy
  *           0 or more, greedy
  *+          0 or more, possessive
  *?          0 or more, lazy
  +           1 or more, greedy
  ++          1 or more, possessive
  +?          1 or more, lazy
  {n}         exactly n
  {n,m}       at least n, no more than m, greedy
  {n,m}+      at least n, no more than m, possessive
  {n,m}?      at least n, no more than m, lazy
  {n,}        n or more, greedy
  {n,}+       n or more, possessive
  {n,}?       n or more, lazy
</PRE>
</P>
<br><a name="SEC9" href="#TOC1">ANCHORS AND SIMPLE ASSERTIONS</a><br>
<P>
<pre>
  \b          word boundary (only ASCII letters recognized)
  \B          not a word boundary
  ^           start of subject
               also after internal newline in multiline mode
  \A          start of subject
  $           end of subject
               also before newline at end of subject
               also before internal newline in multiline mode
  \Z          end of subject
               also before newline at end of subject
  \z          end of subject
  \G          first matching position in subject
</PRE>
</P>
<br><a name="SEC10" href="#TOC1">MATCH POINT RESET</a><br>
<P>
<pre>
  \K          reset start of match
</PRE>
</P>
<br><a name="SEC11" href="#TOC1">ALTERNATION</a><br>
<P>
<pre>
  expr|expr|expr...
</PRE>
</P>
<br><a name="SEC12" href="#TOC1">CAPTURING</a><br>
<P>
<pre>
  (...)           capturing group
  (?&#60;name&#62;...)    named capturing group (Perl)
  (?'name'...)    named capturing group (Perl)
  (?P&#60;name&#62;...)   named capturing group (Python)
  (?:...)         non-capturing group
  (?|...)         non-capturing group; reset group numbers for
                   capturing groups in each alternative
</PRE>
</P>
<br><a name="SEC13" href="#TOC1">ATOMIC GROUPS</a><br>
<P>
<pre>
  (?&#62;...)         atomic, non-capturing group
</PRE>
</P>
<br><a name="SEC14" href="#TOC1">COMMENT</a><br>
<P>
<pre>
  (?#....)        comment (not nestable)
</PRE>
</P>
<br><a name="SEC15" href="#TOC1">OPTION SETTING</a><br>
<P>
<pre>
  (?i)            caseless
  (?J)            allow duplicate names
  (?m)            multiline
  (?s)            single line (dotall)
  (?U)            default ungreedy (lazy)
  (?x)            extended (ignore white space)
  (?-...)         unset option(s)
</pre>
The following is recognized only at the start of a pattern or after one of the
newline-setting options with similar syntax:
<pre>
  (*UTF8)         set UTF-8 mode
</PRE>
</P>
<br><a name="SEC16" href="#TOC1">LOOKAHEAD AND LOOKBEHIND ASSERTIONS</a><br>
<P>
<pre>
  (?=...)         positive look ahead
  (?!...)         negative look ahead
  (?&#60;=...)        positive look behind
  (?&#60;!...)        negative look behind
</pre>
Each top-level branch of a look behind must be of a fixed length.
</P>
<br><a name="SEC17" href="#TOC1">BACKREFERENCES</a><br>
<P>
<pre>
  \n              reference by number (can be ambiguous)
  \gn             reference by number
  \g{n}           reference by number
  \g{-n}          relative reference by number
  \k&#60;name&#62;        reference by name (Perl)
  \k'name'        reference by name (Perl)
  \g{name}        reference by name (Perl)
  \k{name}        reference by name (.NET)
  (?P=name)       reference by name (Python)
</PRE>
</P>
<br><a name="SEC18" href="#TOC1">SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)</a><br>
<P>
<pre>
  (?R)            recurse whole pattern
  (?n)            call subpattern by absolute number
  (?+n)           call subpattern by relative number
  (?-n)           call subpattern by relative number
  (?&name)        call subpattern by name (Perl)
  (?P&#62;name)       call subpattern by name (Python)
  \g&#60;name&#62;        call subpattern by name (Oniguruma)
  \g'name'        call subpattern by name (Oniguruma)
  \g&#60;n&#62;           call subpattern by absolute number (Oniguruma)
  \g'n'           call subpattern by absolute number (Oniguruma)
  \g&#60;+n&#62;          call subpattern by relative number (PCRE extension)
  \g'+n'          call subpattern by relative number (PCRE extension)
  \g&#60;-n&#62;          call subpattern by relative number (PCRE extension)
  \g'-n'          call subpattern by relative number (PCRE extension)
</PRE>
</P>
<br><a name="SEC19" href="#TOC1">CONDITIONAL PATTERNS</a><br>
<P>
<pre>
  (?(condition)yes-pattern)
  (?(condition)yes-pattern|no-pattern)

  (?(n)...        absolute reference condition
  (?(+n)...       relative reference condition
  (?(-n)...       relative reference condition
  (?(&#60;name&#62;)...   named reference condition (Perl)
  (?('name')...   named reference condition (Perl)
  (?(name)...     named reference condition (PCRE)
  (?(R)...        overall recursion condition
  (?(Rn)...       specific group recursion condition
  (?(R&name)...   specific recursion condition
  (?(DEFINE)...   define subpattern for reference
  (?(assert)...   assertion condition
</PRE>
</P>
<br><a name="SEC20" href="#TOC1">BACKTRACKING CONTROL</a><br>
<P>
The following act immediately they are reached:
<pre>
  (*ACCEPT)       force successful match
  (*FAIL)         force backtrack; synonym (*F)
</pre>
The following act only when a subsequent match failure causes a backtrack to
reach them. They all force a match failure, but they differ in what happens
afterwards. Those that advance the start-of-match point do so only if the
pattern is not anchored.
<pre>
  (*COMMIT)       overall failure, no advance of starting point
  (*PRUNE)        advance to next starting character
  (*SKIP)         advance start to current matching position
  (*THEN)         local failure, backtrack to next alternation
</PRE>
</P>
<br><a name="SEC21" href="#TOC1">NEWLINE CONVENTIONS</a><br>
<P>
These are recognized only at the very start of the pattern or after a
(*BSR_...) or (*UTF8) option.
<pre>
  (*CR)           carriage return only
  (*LF)           linefeed only
  (*CRLF)         carriage return followed by linefeed
  (*ANYCRLF)      all three of the above
  (*ANY)          any Unicode newline sequence
</PRE>
</P>
<br><a name="SEC22" href="#TOC1">WHAT \R MATCHES</a><br>
<P>
These are recognized only at the very start of the pattern or after a
(*...) option that sets the newline convention or UTF-8 mode.
<pre>
  (*BSR_ANYCRLF)  CR, LF, or CRLF
  (*BSR_UNICODE)  any Unicode newline sequence
</PRE>
</P>
<br><a name="SEC23" href="#TOC1">CALLOUTS</a><br>
<P>
<pre>
  (?C)      callout
  (?Cn)     callout with data n
</PRE>
</P>
<br><a name="SEC24" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcrepattern</b>(3), <b>pcreapi</b>(3), <b>pcrecallout</b>(3),
<b>pcrematching</b>(3), <b>pcre</b>(3).
</P>
<br><a name="SEC25" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC26" href="#TOC1">REVISION</a><br>
<P>
Last updated: 01 March 2010
<br>
Copyright &copy; 1997-2010 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_fullinfo.html000064400000005352150403561500016747 0ustar00<html>
<head>
<title>pcre_fullinfo specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_fullinfo man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_fullinfo(const pcre *<i>code</i>, const pcre_extra *<i>extra</i>,</b>
<b>int <i>what</i>, void *<i>where</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function returns information about a compiled pattern. Its arguments are:
<pre>
  <i>code</i>                      Compiled regular expression
  <i>extra</i>                     Result of <b>pcre_study()</b> or NULL
  <i>what</i>                      What information is required
  <i>where</i>                     Where to put the information
</pre>
The following information is available:
<pre>
  PCRE_INFO_BACKREFMAX      Number of highest back reference
  PCRE_INFO_CAPTURECOUNT    Number of capturing subpatterns
  PCRE_INFO_DEFAULT_TABLES  Pointer to default tables
  PCRE_INFO_FIRSTBYTE       Fixed first byte for a match, or
                              -1 for start of string
                                 or after newline, or
                              -2 otherwise
  PCRE_INFO_FIRSTTABLE      Table of first bytes (after studying)
  PCRE_INFO_JCHANGED        Return 1 if (?J) or (?-J) was used
  PCRE_INFO_LASTLITERAL     Literal last byte required
  PCRE_INFO_MINLENGTH       Lower bound length of matching strings
  PCRE_INFO_NAMECOUNT       Number of named subpatterns
  PCRE_INFO_NAMEENTRYSIZE   Size of name table entry
  PCRE_INFO_NAMETABLE       Pointer to name table
  PCRE_INFO_OKPARTIAL       Return 1 if partial matching can be tried
                              (always returns 1 after release 8.00)
  PCRE_INFO_OPTIONS         Option bits used for compilation
  PCRE_INFO_SIZE            Size of compiled pattern
  PCRE_INFO_STUDYSIZE       Size of study data
</pre>
The yield of the function is zero on success or:
<pre>
  PCRE_ERROR_NULL           the argument <i>code</i> was NULL
                            the argument <i>where</i> was NULL
  PCRE_ERROR_BADMAGIC       the "magic number" was not found
  PCRE_ERROR_BADOPTION      the value of <i>what</i> was invalid
</PRE>
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_get_named_substring.html000064400000003603150403561500021151 0ustar00<html>
<head>
<title>pcre_get_named_substring specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_get_named_substring man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_get_named_substring(const pcre *<i>code</i>,</b>
<b>const char *<i>subject</i>, int *<i>ovector</i>,</b>
<b>int <i>stringcount</i>, const char *<i>stringname</i>,</b>
<b>const char **<i>stringptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for extracting a captured substring by name. The
arguments are:
<pre>
  <i>code</i>          Compiled pattern
  <i>subject</i>       Subject that has been successfully matched
  <i>ovector</i>       Offset vector that <b>pcre_exec()</b> used
  <i>stringcount</i>   Value returned by <b>pcre_exec()</b>
  <i>stringname</i>    Name of the required substring
  <i>stringptr</i>     Where to put the string pointer
</pre>
The memory in which the substring is placed is obtained by calling
<b>pcre_malloc()</b>. The convenience function <b>pcre_free_substring()</b> can
be used to free it when it is no longer needed. The yield of the function is
the length of the extracted substring, PCRE_ERROR_NOMEMORY if sufficient memory
could not be obtained, or PCRE_ERROR_NOSUBSTRING if the string name is invalid.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcrepartial.html000064400000043321150403561500016424 0ustar00<html>
<head>
<title>pcrepartial specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrepartial man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PARTIAL MATCHING IN PCRE</a>
<li><a name="TOC2" href="#SEC2">PARTIAL MATCHING USING pcre_exec()</a>
<li><a name="TOC3" href="#SEC3">PARTIAL MATCHING USING pcre_dfa_exec()</a>
<li><a name="TOC4" href="#SEC4">PARTIAL MATCHING AND WORD BOUNDARIES</a>
<li><a name="TOC5" href="#SEC5">FORMERLY RESTRICTED PATTERNS</a>
<li><a name="TOC6" href="#SEC6">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a>
<li><a name="TOC7" href="#SEC7">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()</a>
<li><a name="TOC8" href="#SEC8">MULTI-SEGMENT MATCHING WITH pcre_exec()</a>
<li><a name="TOC9" href="#SEC9">ISSUES WITH MULTI-SEGMENT MATCHING</a>
<li><a name="TOC10" href="#SEC10">AUTHOR</a>
<li><a name="TOC11" href="#SEC11">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PARTIAL MATCHING IN PCRE</a><br>
<P>
In normal use of PCRE, if the subject string that is passed to
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> matches as far as it goes, but is
too short to match the entire pattern, PCRE_ERROR_NOMATCH is returned. There
are circumstances where it might be helpful to distinguish this case from other
cases in which there is no match.
</P>
<P>
Consider, for example, an application where a human is required to type in data
for a field with specific formatting requirements. An example might be a date
in the form <i>ddmmmyy</i>, defined by this pattern:
<pre>
  ^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$
</pre>
If the application sees the user's keystrokes one by one, and can check that
what has been typed so far is potentially valid, it is able to raise an error
as soon as a mistake is made, by beeping and not reflecting the character that
has been typed, for example. This immediate feedback is likely to be a better
user interface than a check that is delayed until the entire string has been
entered. Partial matching can also sometimes be useful when the subject string
is very long and is not all available at once.
</P>
<P>
PCRE supports partial matching by means of the PCRE_PARTIAL_SOFT and
PCRE_PARTIAL_HARD options, which can be set when calling <b>pcre_exec()</b> or
<b>pcre_dfa_exec()</b>. For backwards compatibility, PCRE_PARTIAL is a synonym
for PCRE_PARTIAL_SOFT. The essential difference between the two options is
whether or not a partial match is preferred to an alternative complete match,
though the details differ between the two matching functions. If both options
are set, PCRE_PARTIAL_HARD takes precedence.
</P>
<P>
Setting a partial matching option disables two of PCRE's optimizations. PCRE
remembers the last literal byte in a pattern, and abandons matching immediately
if such a byte is not present in the subject string. This optimization cannot
be used for a subject string that might match only partially. If the pattern
was studied, PCRE knows the minimum length of a matching string, and does not
bother to run the matching function on shorter strings. This optimization is
also disabled for partial matching.
</P>
<br><a name="SEC2" href="#TOC1">PARTIAL MATCHING USING pcre_exec()</a><br>
<P>
A partial match occurs during a call to <b>pcre_exec()</b> whenever the end of
the subject string is reached successfully, but matching cannot continue
because more characters are needed. However, at least one character must have
been matched. (In other words, a partial match can never be an empty string.)
</P>
<P>
If PCRE_PARTIAL_SOFT is set, the partial match is remembered, but matching
continues as normal, and other alternatives in the pattern are tried. If no
complete match can be found, <b>pcre_exec()</b> returns PCRE_ERROR_PARTIAL
instead of PCRE_ERROR_NOMATCH. If there are at least two slots in the offsets
vector, the first of them is set to the offset of the earliest character that
was inspected when the partial match was found. For convenience, the second
offset points to the end of the string so that a substring can easily be
identified.
</P>
<P>
For the majority of patterns, the first offset identifies the start of the
partially matched string. However, for patterns that contain lookbehind
assertions, or \K, or begin with \b or \B, earlier characters have been
inspected while carrying out the match. For example:
<pre>
  /(?&#60;=abc)123/
</pre>
This pattern matches "123", but only if it is preceded by "abc". If the subject
string is "xyzabc12", the offsets after a partial match are for the substring
"abc12", because all these characters are needed if another match is tried
with extra characters added.
</P>
<P>
If there is more than one partial match, the first one that was found provides
the data that is returned. Consider this pattern:
<pre>
  /123\w+X|dogY/
</pre>
If this is matched against the subject string "abc123dog", both
alternatives fail to match, but the end of the subject is reached during
matching, so PCRE_ERROR_PARTIAL is returned instead of PCRE_ERROR_NOMATCH. The
offsets are set to 3 and 9, identifying "123dog" as the first partial match
that was found. (In this example, there are two partial matches, because "dog"
on its own partially matches the second alternative.)
</P>
<P>
If PCRE_PARTIAL_HARD is set for <b>pcre_exec()</b>, it returns
PCRE_ERROR_PARTIAL as soon as a partial match is found, without continuing to
search for possible complete matches. The difference between the two options
can be illustrated by a pattern such as:
<pre>
  /dog(sbody)?/
</pre>
This matches either "dog" or "dogsbody", greedily (that is, it prefers the
longer string if possible). If it is matched against the string "dog" with
PCRE_PARTIAL_SOFT, it yields a complete match for "dog". However, if
PCRE_PARTIAL_HARD is set, the result is PCRE_ERROR_PARTIAL. On the other hand,
if the pattern is made ungreedy the result is different:
<pre>
  /dog(sbody)??/
</pre>
In this case the result is always a complete match because <b>pcre_exec()</b>
finds that first, and it never continues after finding a match. It might be
easier to follow this explanation by thinking of the two patterns like this:
<pre>
  /dog(sbody)?/    is the same as  /dogsbody|dog/
  /dog(sbody)??/   is the same as  /dog|dogsbody/
</pre>
The second pattern will never match "dogsbody" when <b>pcre_exec()</b> is
used, because it will always find the shorter match first.
</P>
<br><a name="SEC3" href="#TOC1">PARTIAL MATCHING USING pcre_dfa_exec()</a><br>
<P>
The <b>pcre_dfa_exec()</b> function moves along the subject string character by
character, without backtracking, searching for all possible matches
simultaneously. If the end of the subject is reached before the end of the
pattern, there is the possibility of a partial match, again provided that at
least one character has matched.
</P>
<P>
When PCRE_PARTIAL_SOFT is set, PCRE_ERROR_PARTIAL is returned only if there
have been no complete matches. Otherwise, the complete matches are returned.
However, if PCRE_PARTIAL_HARD is set, a partial match takes precedence over any
complete matches. The portion of the string that was inspected when the longest
partial match was found is set as the first matching string, provided there are
at least two slots in the offsets vector.
</P>
<P>
Because <b>pcre_dfa_exec()</b> always searches for all possible matches, and
there is no difference between greedy and ungreedy repetition, its behaviour is
different from <b>pcre_exec</b> when PCRE_PARTIAL_HARD is set. Consider the
string "dog" matched against the ungreedy pattern shown above:
<pre>
  /dog(sbody)??/
</pre>
Whereas <b>pcre_exec()</b> stops as soon as it finds the complete match for
"dog", <b>pcre_dfa_exec()</b> also finds the partial match for "dogsbody", and
so returns that when PCRE_PARTIAL_HARD is set.
</P>
<br><a name="SEC4" href="#TOC1">PARTIAL MATCHING AND WORD BOUNDARIES</a><br>
<P>
If a pattern ends with one of sequences \b or \B, which test for word
boundaries, partial matching with PCRE_PARTIAL_SOFT can give counter-intuitive
results. Consider this pattern:
<pre>
  /\bcat\b/
</pre>
This matches "cat", provided there is a word boundary at either end. If the
subject string is "the cat", the comparison of the final "t" with a following
character cannot take place, so a partial match is found. However,
<b>pcre_exec()</b> carries on with normal matching, which matches \b at the end
of the subject when the last character is a letter, thus finding a complete
match. The result, therefore, is <i>not</i> PCRE_ERROR_PARTIAL. The same thing
happens with <b>pcre_dfa_exec()</b>, because it also finds the complete match.
</P>
<P>
Using PCRE_PARTIAL_HARD in this case does yield PCRE_ERROR_PARTIAL, because
then the partial match takes precedence.
</P>
<br><a name="SEC5" href="#TOC1">FORMERLY RESTRICTED PATTERNS</a><br>
<P>
For releases of PCRE prior to 8.00, because of the way certain internal
optimizations were implemented in the <b>pcre_exec()</b> function, the
PCRE_PARTIAL option (predecessor of PCRE_PARTIAL_SOFT) could not be used with
all patterns. From release 8.00 onwards, the restrictions no longer apply, and
partial matching with <b>pcre_exec()</b> can be requested for any pattern.
</P>
<P>
Items that were formerly restricted were repeated single characters and
repeated metasequences. If PCRE_PARTIAL was set for a pattern that did not
conform to the restrictions, <b>pcre_exec()</b> returned the error code
PCRE_ERROR_BADPARTIAL (-13). This error code is no longer in use. The
PCRE_INFO_OKPARTIAL call to <b>pcre_fullinfo()</b> to find out if a compiled
pattern can be used for partial matching now always returns 1.
</P>
<br><a name="SEC6" href="#TOC1">EXAMPLE OF PARTIAL MATCHING USING PCRETEST</a><br>
<P>
If the escape sequence \P is present in a <b>pcretest</b> data line, the
PCRE_PARTIAL_SOFT option is used for the match. Here is a run of <b>pcretest</b>
that uses the date example quoted above:
<pre>
    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
  data&#62; 25jun04\P
   0: 25jun04
   1: jun
  data&#62; 25dec3\P
  Partial match: 23dec3
  data&#62; 3ju\P
  Partial match: 3ju
  data&#62; 3juj\P
  No match
  data&#62; j\P
  No match
</pre>
The first data string is matched completely, so <b>pcretest</b> shows the
matched substrings. The remaining four strings do not match the complete
pattern, but the first two are partial matches. Similar output is obtained
when <b>pcre_dfa_exec()</b> is used.
</P>
<P>
If the escape sequence \P is present more than once in a <b>pcretest</b> data
line, the PCRE_PARTIAL_HARD option is set for the match.
</P>
<br><a name="SEC7" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()</a><br>
<P>
When a partial match has been found using <b>pcre_dfa_exec()</b>, it is possible
to continue the match by providing additional subject data and calling
<b>pcre_dfa_exec()</b> again with the same compiled regular expression, this
time setting the PCRE_DFA_RESTART option. You must pass the same working
space as before, because this is where details of the previous partial match
are stored. Here is an example using <b>pcretest</b>, using the \R escape
sequence to set the PCRE_DFA_RESTART option (\D specifies the use of
<b>pcre_dfa_exec()</b>):
<pre>
    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
  data&#62; 23ja\P\D
  Partial match: 23ja
  data&#62; n05\R\D
   0: n05
</pre>
The first call has "23ja" as the subject, and requests partial matching; the
second call has "n05" as the subject for the continued (restarted) match.
Notice that when the match is complete, only the last part is shown; PCRE does
not retain the previously partially-matched string. It is up to the calling
program to do that if it needs to.
</P>
<P>
You can set the PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
PCRE_DFA_RESTART to continue partial matching over multiple segments. This
facility can be used to pass very long subject strings to
<b>pcre_dfa_exec()</b>.
</P>
<br><a name="SEC8" href="#TOC1">MULTI-SEGMENT MATCHING WITH pcre_exec()</a><br>
<P>
From release 8.00, <b>pcre_exec()</b> can also be used to do multi-segment
matching. Unlike <b>pcre_dfa_exec()</b>, it is not possible to restart the
previous match with a new segment of data. Instead, new data must be added to
the previous subject string, and the entire match re-run, starting from the
point where the partial match occurred. Earlier data can be discarded.
Consider an unanchored pattern that matches dates:
<pre>
    re&#62; /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
  data&#62; The date is 23ja\P
  Partial match: 23ja
</pre>
At this stage, an application could discard the text preceding "23ja", add on
text from the next segment, and call <b>pcre_exec()</b> again. Unlike
<b>pcre_dfa_exec()</b>, the entire matching string must always be available, and
the complete matching process occurs for each call, so more memory and more
processing time is needed.
</P>
<P>
<b>Note:</b> If the pattern contains lookbehind assertions, or \K, or starts
with \b or \B, the string that is returned for a partial match will include
characters that precede the partially matched string itself, because these must
be retained when adding on more characters for a subsequent matching attempt.
</P>
<br><a name="SEC9" href="#TOC1">ISSUES WITH MULTI-SEGMENT MATCHING</a><br>
<P>
Certain types of pattern may give problems with multi-segment matching,
whichever matching function is used.
</P>
<P>
1. If the pattern contains tests for the beginning or end of a line, you need
to pass the PCRE_NOTBOL or PCRE_NOTEOL options, as appropriate, when the
subject string for any call does not contain the beginning or end of a line.
</P>
<P>
2. Lookbehind assertions at the start of a pattern are catered for in the
offsets that are returned for a partial match. However, in theory, a lookbehind
assertion later in the pattern could require even earlier characters to be
inspected, and it might not have been reached when a partial match occurs. This
is probably an extremely unlikely case; you could guard against it to a certain
extent by always including extra characters at the start.
</P>
<P>
3. Matching a subject string that is split into multiple segments may not
always produce exactly the same result as matching over one single long string,
especially when PCRE_PARTIAL_SOFT is used. The section "Partial Matching and
Word Boundaries" above describes an issue that arises if the pattern ends with
\b or \B. Another kind of difference may occur when there are multiple
matching possibilities, because a partial match result is given only when there
are no completed matches. This means that as soon as the shortest match has
been found, continuation to a new subject segment is no longer possible.
Consider again this <b>pcretest</b> example:
<pre>
    re&#62; /dog(sbody)?/
  data&#62; dogsb\P
   0: dog
  data&#62; do\P\D
  Partial match: do
  data&#62; gsb\R\P\D
   0: g
  data&#62; dogsbody\D
   0: dogsbody
   1: dog
</pre>
The first data line passes the string "dogsb" to <b>pcre_exec()</b>, setting the
PCRE_PARTIAL_SOFT option. Although the string is a partial match for
"dogsbody", the result is not PCRE_ERROR_PARTIAL, because the shorter string
"dog" is a complete match. Similarly, when the subject is presented to
<b>pcre_dfa_exec()</b> in several parts ("do" and "gsb" being the first two) the
match stops when "dog" has been found, and it is not possible to continue. On
the other hand, if "dogsbody" is presented as a single string,
<b>pcre_dfa_exec()</b> finds both matches.
</P>
<P>
Because of these problems, it is probably best to use PCRE_PARTIAL_HARD when
matching multi-segment data. The example above then behaves differently:
<pre>
    re&#62; /dog(sbody)?/
  data&#62; dogsb\P\P
  Partial match: dogsb
  data&#62; do\P\D
  Partial match: do
  data&#62; gsb\R\P\P\D
  Partial match: gsb

</PRE>
</P>
<P>
4. Patterns that contain alternatives at the top level which do not all
start with the same pattern item may not work as expected when
PCRE_DFA_RESTART is used with <b>pcre_dfa_exec()</b>. For example, consider this
pattern:
<pre>
  1234|3789
</pre>
If the first part of the subject is "ABC123", a partial match of the first
alternative is found at offset 3. There is no partial match for the second
alternative, because such a match does not start at the same point in the
subject string. Attempting to continue with the string "7890" does not yield a
match because only those alternatives that match at one point in the subject
are remembered. The problem arises because the start of the second alternative
matches within the first alternative. There is no problem with anchored
patterns or patterns such as:
<pre>
  1234|ABCD
</pre>
where no string can be a partial match for both alternatives. This is not a
problem if <b>pcre_exec()</b> is used, because the entire match has to be rerun
each time:
<pre>
    re&#62; /1234|3789/
  data&#62; ABC123\P
  Partial match: 123
  data&#62; 1237890
   0: 3789
</pre>
Of course, instead of using PCRE_DFA_PARTIAL, the same technique of re-running
the entire match can also be used with <b>pcre_dfa_exec()</b>. Another
possibility is to work with two buffers. If a partial match at offset <i>n</i>
in the first buffer is followed by "no match" when PCRE_DFA_RESTART is used on
the second buffer, you can then try a new match starting at offset <i>n+1</i> in
the first buffer.
</P>
<br><a name="SEC10" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC11" href="#TOC1">REVISION</a><br>
<P>
Last updated: 19 October 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_study.html000064400000003223150403561500016274 0ustar00<html>
<head>
<title>pcre_study specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_study man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>pcre_extra *pcre_study(const pcre *<i>code</i>, int <i>options</i>,</b>
<b>const char **<i>errptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function studies a compiled pattern, to see if additional information can
be extracted that might speed up matching. Its arguments are:
<pre>
  <i>code</i>       A compiled regular expression
  <i>options</i>    Options for <b>pcre_study()</b>
  <i>errptr</i>     Where to put an error message
</pre>
If the function succeeds, it returns a value that can be passed to
<b>pcre_exec()</b> via its <i>extra</i> argument.
</P>
<P>
If the function returns NULL, either it could not find any additional
information, or there was an error. You can tell the difference by looking at
the error value. It is NULL in first case.
</P>
<P>
There are currently no options defined; the value of the second argument should
always be zero.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_maketables.html000064400000002353150403561500017237 0ustar00<html>
<head>
<title>pcre_maketables specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_maketables man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>const unsigned char *pcre_maketables(void);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function builds a set of character tables for character values less than
256. These can be passed to <b>pcre_compile()</b> to override PCRE's internal,
built-in tables (which were made by <b>pcre_maketables()</b> when PCRE was
compiled). You might want to do this if you are using a non-standard locale.
The function yields a pointer to the tables.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_get_stringtable_entries.html000064400000003246150403561500022037 0ustar00<html>
<head>
<title>pcre_get_stringtable_entries specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_get_stringtable_entries man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_get_stringtable_entries(const pcre *<i>code</i>,</b>
<b>const char *<i>name</i>, char **<i>first</i>, char **<i>last</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This convenience function finds, for a compiled pattern, the first and last
entries for a given name in the table that translates capturing parenthesis
names into numbers. When names are required to be unique (PCRE_DUPNAMES is
<i>not</i> set), it is usually easier to use <b>pcre_get_stringnumber()</b>
instead.
<pre>
  <i>code</i>    Compiled regular expression
  <i>name</i>    Name whose entries required
  <i>first</i>   Where to return a pointer to the first entry
  <i>last</i>    Where to return a pointer to the last entry
</pre>
The yield of the function is the length of each entry, or
PCRE_ERROR_NOSUBSTRING if none are found.
</P>
<P>
There is a complete description of the PCRE native API, including the format of
the table entries, in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page, and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_config.html000064400000005012150403561510016370 0ustar00<html>
<head>
<title>pcre_config specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_config man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_config(int <i>what</i>, void *<i>where</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function makes it possible for a client program to find out which optional
features are available in the version of the PCRE library it is using. Its
arguments are as follows:
<pre>
  <i>what</i>     A code specifying what information is required
  <i>where</i>    Points to where to put the data
</pre>
The available codes are:
<pre>
  PCRE_CONFIG_LINK_SIZE     Internal link size: 2, 3, or 4
  PCRE_CONFIG_MATCH_LIMIT   Internal resource limit
  PCRE_CONFIG_MATCH_LIMIT_RECURSION
                            Internal recursion depth limit
  PCRE_CONFIG_NEWLINE       Value of the default newline sequence:
                                13 (0x000d)    for CR
                                10 (0x000a)    for LF
                              3338 (0x0d0a)    for CRLF
                                -2             for ANYCRLF
                                -1             for ANY
  PCRE_CONFIG_BSR           Indicates what \R matches by default:
                                 0             all Unicode line endings
                                 1             CR, LF, or CRLF only
  PCRE_CONFIG_POSIX_MALLOC_THRESHOLD
                            Threshold of return slots, above
                              which <b>malloc()</b> is used by
                              the POSIX API
  PCRE_CONFIG_STACKRECURSE  Recursion implementation (1=stack 0=heap)
  PCRE_CONFIG_UTF8          Availability of UTF-8 support (1=yes 0=no)
  PCRE_CONFIG_UNICODE_PROPERTIES
                            Availability of Unicode property support
                              (1=yes 0=no)
</pre>
The function yields 0 on success or PCRE_ERROR_BADOPTION otherwise.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcrecallout.html000064400000021204150403561510016430 0ustar00<html>
<head>
<title>pcrecallout specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcrecallout man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">PCRE CALLOUTS</a>
<li><a name="TOC2" href="#SEC2">MISSING CALLOUTS</a>
<li><a name="TOC3" href="#SEC3">THE CALLOUT INTERFACE</a>
<li><a name="TOC4" href="#SEC4">RETURN VALUES</a>
<li><a name="TOC5" href="#SEC5">AUTHOR</a>
<li><a name="TOC6" href="#SEC6">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">PCRE CALLOUTS</a><br>
<P>
<b>int (*pcre_callout)(pcre_callout_block *);</b>
</P>
<P>
PCRE provides a feature called "callout", which is a means of temporarily
passing control to the caller of PCRE in the middle of pattern matching. The
caller of PCRE provides an external function by putting its entry point in the
global variable <i>pcre_callout</i>. By default, this variable contains NULL,
which disables all calling out.
</P>
<P>
Within a regular expression, (?C) indicates the points at which the external
function is to be called. Different callout points can be identified by putting
a number less than 256 after the letter C. The default value is zero.
For example, this pattern has two callout points:
<pre>
  (?C1)abc(?C2)def
</pre>
If the PCRE_AUTO_CALLOUT option bit is set when <b>pcre_compile()</b> or
<b>pcre_compile2()</b> is called, PCRE automatically inserts callouts, all with
number 255, before each item in the pattern. For example, if PCRE_AUTO_CALLOUT
is used with the pattern
<pre>
  A(\d{2}|--)
</pre>
it is processed as if it were
<br>
<br>
(?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)
<br>
<br>
Notice that there is a callout before and after each parenthesis and
alternation bar. Automatic callouts can be used for tracking the progress of
pattern matching. The
<a href="pcretest.html"><b>pcretest</b></a>
command has an option that sets automatic callouts; when it is used, the output
indicates how the pattern is matched. This is useful information when you are
trying to optimize the performance of a particular pattern.
</P>
<br><a name="SEC2" href="#TOC1">MISSING CALLOUTS</a><br>
<P>
You should be aware that, because of optimizations in the way PCRE matches
patterns by default, callouts sometimes do not happen. For example, if the
pattern is
<pre>
  ab(?C4)cd
</pre>
PCRE knows that any matching string must contain the letter "d". If the subject
string is "abyz", the lack of "d" means that matching doesn't ever start, and
the callout is never reached. However, with "abyd", though the result is still
no match, the callout is obeyed.
</P>
<P>
If the pattern is studied, PCRE knows the minimum length of a matching string,
and will immediately give a "no match" return without actually running a match
if the subject is not long enough, or, for unanchored patterns, if it has
been scanned far enough.
</P>
<P>
You can disable these optimizations by passing the PCRE_NO_START_OPTIMIZE
option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. This slows down the
matching process, but does ensure that callouts such as the example above are
obeyed.
</P>
<br><a name="SEC3" href="#TOC1">THE CALLOUT INTERFACE</a><br>
<P>
During matching, when PCRE reaches a callout point, the external function
defined by <i>pcre_callout</i> is called (if it is set). This applies to both
the <b>pcre_exec()</b> and the <b>pcre_dfa_exec()</b> matching functions. The
only argument to the callout function is a pointer to a <b>pcre_callout</b>
block. This structure contains the following fields:
<pre>
  int          <i>version</i>;
  int          <i>callout_number</i>;
  int         *<i>offset_vector</i>;
  const char  *<i>subject</i>;
  int          <i>subject_length</i>;
  int          <i>start_match</i>;
  int          <i>current_position</i>;
  int          <i>capture_top</i>;
  int          <i>capture_last</i>;
  void        *<i>callout_data</i>;
  int          <i>pattern_position</i>;
  int          <i>next_item_length</i>;
</pre>
The <i>version</i> field is an integer containing the version number of the
block format. The initial version was 0; the current version is 1. The version
number will change again in future if additional fields are added, but the
intention is never to remove any of the existing fields.
</P>
<P>
The <i>callout_number</i> field contains the number of the callout, as compiled
into the pattern (that is, the number after ?C for manual callouts, and 255 for
automatically generated callouts).
</P>
<P>
The <i>offset_vector</i> field is a pointer to the vector of offsets that was
passed by the caller to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>. When
<b>pcre_exec()</b> is used, the contents can be inspected in order to extract
substrings that have been matched so far, in the same way as for extracting
substrings after a match has completed. For <b>pcre_dfa_exec()</b> this field is
not useful.
</P>
<P>
The <i>subject</i> and <i>subject_length</i> fields contain copies of the values
that were passed to <b>pcre_exec()</b>.
</P>
<P>
The <i>start_match</i> field normally contains the offset within the subject at
which the current match attempt started. However, if the escape sequence \K
has been encountered, this value is changed to reflect the modified starting
point. If the pattern is not anchored, the callout function may be called
several times from the same point in the pattern for different starting points
in the subject.
</P>
<P>
The <i>current_position</i> field contains the offset within the subject of the
current match pointer.
</P>
<P>
When the <b>pcre_exec()</b> function is used, the <i>capture_top</i> field
contains one more than the number of the highest numbered captured substring so
far. If no substrings have been captured, the value of <i>capture_top</i> is
one. This is always the case when <b>pcre_dfa_exec()</b> is used, because it
does not support captured substrings.
</P>
<P>
The <i>capture_last</i> field contains the number of the most recently captured
substring. If no substrings have been captured, its value is -1. This is always
the case when <b>pcre_dfa_exec()</b> is used.
</P>
<P>
The <i>callout_data</i> field contains a value that is passed to
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> specifically so that it can be
passed back in callouts. It is passed in the <i>pcre_callout</i> field of the
<b>pcre_extra</b> data structure. If no such data was passed, the value of
<i>callout_data</i> in a <b>pcre_callout</b> block is NULL. There is a
description of the <b>pcre_extra</b> structure in the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation.
</P>
<P>
The <i>pattern_position</i> field is present from version 1 of the
<i>pcre_callout</i> structure. It contains the offset to the next item to be
matched in the pattern string.
</P>
<P>
The <i>next_item_length</i> field is present from version 1 of the
<i>pcre_callout</i> structure. It contains the length of the next item to be
matched in the pattern string. When the callout immediately precedes an
alternation bar, a closing parenthesis, or the end of the pattern, the length
is zero. When the callout precedes an opening parenthesis, the length is that
of the entire subpattern.
</P>
<P>
The <i>pattern_position</i> and <i>next_item_length</i> fields are intended to
help in distinguishing between different automatic callouts, which all have the
same callout number. However, they are set for all callouts.
</P>
<br><a name="SEC4" href="#TOC1">RETURN VALUES</a><br>
<P>
The external callout function returns an integer to PCRE. If the value is zero,
matching proceeds as normal. If the value is greater than zero, matching fails
at the current point, but the testing of other matching possibilities goes
ahead, just as if a lookahead assertion had failed. If the value is less than
zero, the match is abandoned, and <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
returns the negative value.
</P>
<P>
Negative values should normally be chosen from the set of PCRE_ERROR_xxx
values. In particular, PCRE_ERROR_NOMATCH forces a standard "no match" failure.
The error number PCRE_ERROR_CALLOUT is reserved for use by callout functions;
it will never be used by PCRE itself.
</P>
<br><a name="SEC5" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC6" href="#TOC1">REVISION</a><br>
<P>
Last updated: 29 September 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_compile2.html000064400000007051150403561510016642 0ustar00<html>
<head>
<title>pcre_compile2 specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_compile2 man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>pcre *pcre_compile2(const char *<i>pattern</i>, int <i>options</i>,</b>
<b>int *<i>errorcodeptr</i>,</b>
<b>const char **<i>errptr</i>, int *<i>erroffset</i>,</b>
<b>const unsigned char *<i>tableptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function compiles a regular expression into an internal form. It is the
same as <b>pcre_compile()</b>, except for the addition of the <i>errorcodeptr</i>
argument. The arguments are:
</P>
<P>
<pre>
  <i>pattern</i>       A zero-terminated string containing the
                  regular expression to be compiled
  <i>options</i>       Zero or more option bits
  <i>errorcodeptr</i>  Where to put an error code
  <i>errptr</i>        Where to put an error message
  <i>erroffset</i>     Offset in pattern where error was found
  <i>tableptr</i>      Pointer to character tables, or NULL to
                  use the built-in default
</pre>
The option bits are:
<pre>
  PCRE_ANCHORED           Force pattern anchoring
  PCRE_AUTO_CALLOUT       Compile automatic callouts
  PCRE_BSR_ANYCRLF        \R matches only CR, LF, or CRLF
  PCRE_BSR_UNICODE        \R matches all Unicode line endings
  PCRE_CASELESS           Do caseless matching
  PCRE_DOLLAR_ENDONLY     $ not to match newline at end
  PCRE_DOTALL             . matches anything including NL
  PCRE_DUPNAMES           Allow duplicate names for subpatterns
  PCRE_EXTENDED           Ignore whitespace and # comments
  PCRE_EXTRA              PCRE extra features
                            (not much use currently)
  PCRE_FIRSTLINE          Force matching to be before newline
  PCRE_JAVASCRIPT_COMPAT  JavaScript compatibility
  PCRE_MULTILINE          ^ and $ match newlines within data
  PCRE_NEWLINE_ANY        Recognize any Unicode newline sequence
  PCRE_NEWLINE_ANYCRLF    Recognize CR, LF, and CRLF as newline
                            sequences
  PCRE_NEWLINE_CR         Set CR as the newline sequence
  PCRE_NEWLINE_CRLF       Set CRLF as the newline sequence
  PCRE_NEWLINE_LF         Set LF as the newline sequence
  PCRE_NO_AUTO_CAPTURE    Disable numbered capturing paren-
                            theses (named ones available)
  PCRE_NO_UTF8_CHECK      Do not check the pattern for UTF-8
                            validity (only relevant if
                            PCRE_UTF8 is set)
  PCRE_UNGREEDY           Invert greediness of quantifiers
  PCRE_UTF8               Run in UTF-8 mode
</pre>
PCRE must be built with UTF-8 support in order to use PCRE_UTF8 and
PCRE_NO_UTF8_CHECK.
</P>
<P>
The yield of the function is a pointer to a private data structure that
contains the compiled pattern, or NULL if an error was detected. Note that
compiling regular expressions with one version of PCRE for use with a different
version is not guaranteed to work and may cause crashes.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_info.html000064400000001775150403561510016072 0ustar00<html>
<head>
<title>pcre_info specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_info man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_info(const pcre *<i>code</i>, int *<i>optptr</i>, int</b>
<b>*<i>firstcharptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is obsolete. You should be using <b>pcre_fullinfo()</b> instead.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcretest.html000064400000074474150403561520015766 0ustar00<html>
<head>
<title>pcretest specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcretest man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS</a>
<li><a name="TOC2" href="#SEC2">OPTIONS</a>
<li><a name="TOC3" href="#SEC3">DESCRIPTION</a>
<li><a name="TOC4" href="#SEC4">PATTERN MODIFIERS</a>
<li><a name="TOC5" href="#SEC5">DATA LINES</a>
<li><a name="TOC6" href="#SEC6">THE ALTERNATIVE MATCHING FUNCTION</a>
<li><a name="TOC7" href="#SEC7">DEFAULT OUTPUT FROM PCRETEST</a>
<li><a name="TOC8" href="#SEC8">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a>
<li><a name="TOC9" href="#SEC9">RESTARTING AFTER A PARTIAL MATCH</a>
<li><a name="TOC10" href="#SEC10">CALLOUTS</a>
<li><a name="TOC11" href="#SEC11">NON-PRINTING CHARACTERS</a>
<li><a name="TOC12" href="#SEC12">SAVING AND RELOADING COMPILED PATTERNS</a>
<li><a name="TOC13" href="#SEC13">SEE ALSO</a>
<li><a name="TOC14" href="#SEC14">AUTHOR</a>
<li><a name="TOC15" href="#SEC15">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS</a><br>
<P>
<b>pcretest [options] [source] [destination]</b>
<br>
<br>
<b>pcretest</b> was written as a test program for the PCRE regular expression
library itself, but it can also be used for experimenting with regular
expressions. This document describes the features of the test program; for
details of the regular expressions themselves, see the
<a href="pcrepattern.html"><b>pcrepattern</b></a>
documentation. For details of the PCRE library function calls and their
options, see the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation.
</P>
<br><a name="SEC2" href="#TOC1">OPTIONS</a><br>
<P>
<b>-b</b>
Behave as if each regex has the <b>/B</b> (show bytecode) modifier; the internal
form is output after compilation.
</P>
<P>
<b>-C</b>
Output the version number of the PCRE library, and all available information
about the optional features that are included, and then exit.
</P>
<P>
<b>-d</b>
Behave as if each regex has the <b>/D</b> (debug) modifier; the internal
form and information about the compiled pattern is output after compilation;
<b>-d</b> is equivalent to <b>-b -i</b>.
</P>
<P>
<b>-dfa</b>
Behave as if each data line contains the \D escape sequence; this causes the
alternative matching function, <b>pcre_dfa_exec()</b>, to be used instead of the
standard <b>pcre_exec()</b> function (more detail is given below).
</P>
<P>
<b>-help</b>
Output a brief summary these options and then exit.
</P>
<P>
<b>-i</b>
Behave as if each regex has the <b>/I</b> modifier; information about the
compiled pattern is given after compilation.
</P>
<P>
<b>-M</b>
Behave as if each data line contains the \M escape sequence; this causes
PCRE to discover the minimum MATCH_LIMIT and MATCH_LIMIT_RECURSION settings by
calling <b>pcre_exec()</b> repeatedly with different limits.
</P>
<P>
<b>-m</b>
Output the size of each compiled pattern after it has been compiled. This is
equivalent to adding <b>/M</b> to each regular expression. For compatibility
with earlier versions of pcretest, <b>-s</b> is a synonym for <b>-m</b>.
</P>
<P>
<b>-o</b> <i>osize</i>
Set the number of elements in the output vector that is used when calling
<b>pcre_exec()</b> or <b>pcre_dfa_exec()</b> to be <i>osize</i>. The default value
is 45, which is enough for 14 capturing subexpressions for <b>pcre_exec()</b> or
22 different matches for <b>pcre_dfa_exec()</b>. The vector size can be
changed for individual matching calls by including \O in the data line (see
below).
</P>
<P>
<b>-p</b>
Behave as if each regex has the <b>/P</b> modifier; the POSIX wrapper API is
used to call PCRE. None of the other options has any effect when <b>-p</b> is
set.
</P>
<P>
<b>-q</b>
Do not output the version number of <b>pcretest</b> at the start of execution.
</P>
<P>
<b>-S</b> <i>size</i>
On Unix-like systems, set the size of the runtime stack to <i>size</i>
megabytes.
</P>
<P>
<b>-t</b>
Run each compile, study, and match many times with a timer, and output
resulting time per compile or match (in milliseconds). Do not set <b>-m</b> with
<b>-t</b>, because you will then get the size output a zillion times, and the
timing will be distorted. You can control the number of iterations that are
used for timing by following <b>-t</b> with a number (as a separate item on the
command line). For example, "-t 1000" would iterate 1000 times. The default is
to iterate 500000 times.
</P>
<P>
<b>-tm</b>
This is like <b>-t</b> except that it times only the matching phase, not the
compile or study phases.
</P>
<br><a name="SEC3" href="#TOC1">DESCRIPTION</a><br>
<P>
If <b>pcretest</b> is given two filename arguments, it reads from the first and
writes to the second. If it is given only one filename argument, it reads from
that file and writes to stdout. Otherwise, it reads from stdin and writes to
stdout, and prompts for each line of input, using "re&#62;" to prompt for regular
expressions, and "data&#62;" to prompt for data lines.
</P>
<P>
When <b>pcretest</b> is built, a configuration option can specify that it should
be linked with the <b>libreadline</b> library. When this is done, if the input
is from a terminal, it is read using the <b>readline()</b> function. This
provides line-editing and history facilities. The output from the <b>-help</b>
option states whether or not <b>readline()</b> will be used.
</P>
<P>
The program handles any number of sets of input on a single input file. Each
set starts with a regular expression, and continues with any number of data
lines to be matched against the pattern.
</P>
<P>
Each data line is matched separately and independently. If you want to do
multi-line matches, you have to use the \n escape sequence (or \r or \r\n,
etc., depending on the newline setting) in a single line of input to encode the
newline sequences. There is no limit on the length of data lines; the input
buffer is automatically extended if it is too small.
</P>
<P>
An empty line signals the end of the data lines, at which point a new regular
expression is read. The regular expressions are given enclosed in any
non-alphanumeric delimiters other than backslash, for example:
<pre>
  /(a|bc)x+yz/
</pre>
White space before the initial delimiter is ignored. A regular expression may
be continued over several input lines, in which case the newline characters are
included within it. It is possible to include the delimiter within the pattern
by escaping it, for example
<pre>
  /abc\/def/
</pre>
If you do so, the escape and the delimiter form part of the pattern, but since
delimiters are always non-alphanumeric, this does not affect its interpretation.
If the terminating delimiter is immediately followed by a backslash, for
example,
<pre>
  /abc/\
</pre>
then a backslash is added to the end of the pattern. This is done to provide a
way of testing the error condition that arises if a pattern finishes with a
backslash, because
<pre>
  /abc\/
</pre>
is interpreted as the first line of a pattern that starts with "abc/", causing
pcretest to read the next line as a continuation of the regular expression.
</P>
<br><a name="SEC4" href="#TOC1">PATTERN MODIFIERS</a><br>
<P>
A pattern may be followed by any number of modifiers, which are mostly single
characters. Following Perl usage, these are referred to below as, for example,
"the <b>/i</b> modifier", even though the delimiter of the pattern need not
always be a slash, and no slash is used when writing modifiers. Whitespace may
appear between the final pattern delimiter and the first modifier, and between
the modifiers themselves.
</P>
<P>
The <b>/i</b>, <b>/m</b>, <b>/s</b>, and <b>/x</b> modifiers set the PCRE_CASELESS,
PCRE_MULTILINE, PCRE_DOTALL, or PCRE_EXTENDED options, respectively, when
<b>pcre_compile()</b> is called. These four modifier letters have the same
effect as they do in Perl. For example:
<pre>
  /caseless/i
</pre>
The following table shows additional modifiers for setting PCRE options that do
not correspond to anything in Perl:
<pre>
  <b>/A</b>              PCRE_ANCHORED
  <b>/C</b>              PCRE_AUTO_CALLOUT
  <b>/E</b>              PCRE_DOLLAR_ENDONLY
  <b>/f</b>              PCRE_FIRSTLINE
  <b>/J</b>              PCRE_DUPNAMES
  <b>/N</b>              PCRE_NO_AUTO_CAPTURE
  <b>/U</b>              PCRE_UNGREEDY
  <b>/X</b>              PCRE_EXTRA
  <b>/&#60;JS&#62;</b>           PCRE_JAVASCRIPT_COMPAT
  <b>/&#60;cr&#62;</b>           PCRE_NEWLINE_CR
  <b>/&#60;lf&#62;</b>           PCRE_NEWLINE_LF
  <b>/&#60;crlf&#62;</b>         PCRE_NEWLINE_CRLF
  <b>/&#60;anycrlf&#62;</b>      PCRE_NEWLINE_ANYCRLF
  <b>/&#60;any&#62;</b>          PCRE_NEWLINE_ANY
  <b>/&#60;bsr_anycrlf&#62;</b>  PCRE_BSR_ANYCRLF
  <b>/&#60;bsr_unicode&#62;</b>  PCRE_BSR_UNICODE
</pre>
Those specifying line ending sequences are literal strings as shown, but the
letters can be in either case. This example sets multiline matching with CRLF
as the line ending sequence:
<pre>
  /^abc/m&#60;crlf&#62;
</pre>
Details of the meanings of these PCRE options are given in the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation.
</P>
<br><b>
Finding all matches in a string
</b><br>
<P>
Searching for all possible matches within each subject string can be requested
by the <b>/g</b> or <b>/G</b> modifier. After finding a match, PCRE is called
again to search the remainder of the subject string. The difference between
<b>/g</b> and <b>/G</b> is that the former uses the <i>startoffset</i> argument to
<b>pcre_exec()</b> to start searching at a new point within the entire string
(which is in effect what Perl does), whereas the latter passes over a shortened
substring. This makes a difference to the matching process if the pattern
begins with a lookbehind assertion (including \b or \B).
</P>
<P>
If any call to <b>pcre_exec()</b> in a <b>/g</b> or <b>/G</b> sequence matches an
empty string, the next call is done with the PCRE_NOTEMPTY_ATSTART and
PCRE_ANCHORED flags set in order to search for another, non-empty, match at the
same point. If this second match fails, the start offset is advanced by one
character, and the normal match is retried. This imitates the way Perl handles
such cases when using the <b>/g</b> modifier or the <b>split()</b> function.
</P>
<br><b>
Other modifiers
</b><br>
<P>
There are yet more modifiers for controlling the way <b>pcretest</b>
operates.
</P>
<P>
The <b>/+</b> modifier requests that as well as outputting the substring that
matched the entire pattern, pcretest should in addition output the remainder of
the subject string. This is useful for tests where the subject contains
multiple copies of the same substring.
</P>
<P>
The <b>/B</b> modifier is a debugging feature. It requests that <b>pcretest</b>
output a representation of the compiled byte code after compilation. Normally
this information contains length and offset values; however, if <b>/Z</b> is
also present, this data is replaced by spaces. This is a special feature for
use in the automatic test scripts; it ensures that the same output is generated
for different internal link sizes.
</P>
<P>
The <b>/L</b> modifier must be followed directly by the name of a locale, for
example,
<pre>
  /pattern/Lfr_FR
</pre>
For this reason, it must be the last modifier. The given locale is set,
<b>pcre_maketables()</b> is called to build a set of character tables for the
locale, and this is then passed to <b>pcre_compile()</b> when compiling the
regular expression. Without an <b>/L</b> modifier, NULL is passed as the tables
pointer; that is, <b>/L</b> applies only to the expression on which it appears.
</P>
<P>
The <b>/I</b> modifier requests that <b>pcretest</b> output information about the
compiled pattern (whether it is anchored, has a fixed first character, and
so on). It does this by calling <b>pcre_fullinfo()</b> after compiling a
pattern. If the pattern is studied, the results of that are also output.
</P>
<P>
The <b>/D</b> modifier is a PCRE debugging feature, and is equivalent to
<b>/BI</b>, that is, both the <b>/B</b> and the <b>/I</b> modifiers.
</P>
<P>
The <b>/F</b> modifier causes <b>pcretest</b> to flip the byte order of the
fields in the compiled pattern that contain 2-byte and 4-byte numbers. This
facility is for testing the feature in PCRE that allows it to execute patterns
that were compiled on a host with a different endianness. This feature is not
available when the POSIX interface to PCRE is being used, that is, when the
<b>/P</b> pattern modifier is specified. See also the section about saving and
reloading compiled patterns below.
</P>
<P>
The <b>/S</b> modifier causes <b>pcre_study()</b> to be called after the
expression has been compiled, and the results used when the expression is
matched.
</P>
<P>
The <b>/M</b> modifier causes the size of memory block used to hold the compiled
pattern to be output.
</P>
<P>
The <b>/P</b> modifier causes <b>pcretest</b> to call PCRE via the POSIX wrapper
API rather than its native API. When this is done, all other modifiers except
<b>/i</b>, <b>/m</b>, and <b>/+</b> are ignored. REG_ICASE is set if <b>/i</b> is
present, and REG_NEWLINE is set if <b>/m</b> is present. The wrapper functions
force PCRE_DOLLAR_ENDONLY always, and PCRE_DOTALL unless REG_NEWLINE is set.
</P>
<P>
The <b>/8</b> modifier causes <b>pcretest</b> to call PCRE with the PCRE_UTF8
option set. This turns on support for UTF-8 character handling in PCRE,
provided that it was compiled with this support enabled. This modifier also
causes any non-printing characters in output strings to be printed using the
\x{hh...} notation if they are valid UTF-8 sequences.
</P>
<P>
If the <b>/?</b> modifier is used with <b>/8</b>, it causes <b>pcretest</b> to
call <b>pcre_compile()</b> with the PCRE_NO_UTF8_CHECK option, to suppress the
checking of the string for UTF-8 validity.
</P>
<br><a name="SEC5" href="#TOC1">DATA LINES</a><br>
<P>
Before each data line is passed to <b>pcre_exec()</b>, leading and trailing
whitespace is removed, and it is then scanned for \ escapes. Some of these are
pretty esoteric features, intended for checking out some of the more
complicated features of PCRE. If you are just testing "ordinary" regular
expressions, you probably don't need any of these. The following escapes are
recognized:
<pre>
  \a         alarm (BEL, \x07)
  \b         backspace (\x08)
  \e         escape (\x27)
  \f         formfeed (\x0c)
  \n         newline (\x0a)
  \qdd       set the PCRE_MATCH_LIMIT limit to dd (any number of digits)
  \r         carriage return (\x0d)
  \t         tab (\x09)
  \v         vertical tab (\x0b)
  \nnn       octal character (up to 3 octal digits)
  \xhh       hexadecimal character (up to 2 hex digits)
  \x{hh...}  hexadecimal character, any number of digits in UTF-8 mode
  \A         pass the PCRE_ANCHORED option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \B         pass the PCRE_NOTBOL option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \Cdd       call pcre_copy_substring() for substring dd after a successful match (number less than 32)
  \Cname     call pcre_copy_named_substring() for substring "name" after a successful match (name termin-
               ated by next non alphanumeric character)
  \C+        show the current captured substrings at callout time
  \C-        do not supply a callout function
  \C!n       return 1 instead of 0 when callout number n is reached
  \C!n!m     return 1 instead of 0 when callout number n is reached for the nth time
  \C*n       pass the number n (may be negative) as callout data; this is used as the callout return value
  \D         use the <b>pcre_dfa_exec()</b> match function
  \F         only shortest match for <b>pcre_dfa_exec()</b>
  \Gdd       call pcre_get_substring() for substring dd after a successful match (number less than 32)
  \Gname     call pcre_get_named_substring() for substring "name" after a successful match (name termin-
               ated by next non-alphanumeric character)
  \L         call pcre_get_substringlist() after a successful match
  \M         discover the minimum MATCH_LIMIT and MATCH_LIMIT_RECURSION settings
  \N         pass the PCRE_NOTEMPTY option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>; if used twice, pass the
               PCRE_NOTEMPTY_ATSTART option
  \Odd       set the size of the output vector passed to <b>pcre_exec()</b> to dd (any number of digits)
  \P         pass the PCRE_PARTIAL_SOFT option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>; if used twice, pass the
               PCRE_PARTIAL_HARD option
  \Qdd       set the PCRE_MATCH_LIMIT_RECURSION limit to dd (any number of digits)
  \R         pass the PCRE_DFA_RESTART option to <b>pcre_dfa_exec()</b>
  \S         output details of memory get/free calls during matching
  \Y         pass the PCRE_NO_START_OPTIMIZE option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \Z         pass the PCRE_NOTEOL option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \?         pass the PCRE_NO_UTF8_CHECK option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \&#62;dd       start the match at offset dd (any number of digits);
               this sets the <i>startoffset</i> argument for <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \&#60;cr&#62;      pass the PCRE_NEWLINE_CR option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \&#60;lf&#62;      pass the PCRE_NEWLINE_LF option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \&#60;crlf&#62;    pass the PCRE_NEWLINE_CRLF option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \&#60;anycrlf&#62; pass the PCRE_NEWLINE_ANYCRLF option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
  \&#60;any&#62;     pass the PCRE_NEWLINE_ANY option to <b>pcre_exec()</b> or <b>pcre_dfa_exec()</b>
</pre>
The escapes that specify line ending sequences are literal strings, exactly as
shown. No more than one newline setting should be present in any data line.
</P>
<P>
A backslash followed by anything else just escapes the anything else. If
the very last character is a backslash, it is ignored. This gives a way of
passing an empty line as data, since a real empty line terminates the data
input.
</P>
<P>
If \M is present, <b>pcretest</b> calls <b>pcre_exec()</b> several times, with
different values in the <i>match_limit</i> and <i>match_limit_recursion</i>
fields of the <b>pcre_extra</b> data structure, until it finds the minimum
numbers for each parameter that allow <b>pcre_exec()</b> to complete. The
<i>match_limit</i> number is a measure of the amount of backtracking that takes
place, and checking it out can be instructive. For most simple matches, the
number is quite small, but for patterns with very large numbers of matching
possibilities, it can become large very quickly with increasing length of
subject string. The <i>match_limit_recursion</i> number is a measure of how much
stack (or, if PCRE is compiled with NO_RECURSE, how much heap) memory is needed
to complete the match attempt.
</P>
<P>
When \O is used, the value specified may be higher or lower than the size set
by the <b>-O</b> command line option (or defaulted to 45); \O applies only to
the call of <b>pcre_exec()</b> for the line in which it appears.
</P>
<P>
If the <b>/P</b> modifier was present on the pattern, causing the POSIX wrapper
API to be used, the only option-setting sequences that have any effect are \B
and \Z, causing REG_NOTBOL and REG_NOTEOL, respectively, to be passed to
<b>regexec()</b>.
</P>
<P>
The use of \x{hh...} to represent UTF-8 characters is not dependent on the use
of the <b>/8</b> modifier on the pattern. It is recognized always. There may be
any number of hexadecimal digits inside the braces. The result is from one to
six bytes, encoded according to the original UTF-8 rules of RFC 2279. This
allows for values in the range 0 to 0x7FFFFFFF. Note that not all of those are
valid Unicode code points, or indeed valid UTF-8 characters according to the
later rules in RFC 3629.
</P>
<br><a name="SEC6" href="#TOC1">THE ALTERNATIVE MATCHING FUNCTION</a><br>
<P>
By default, <b>pcretest</b> uses the standard PCRE matching function,
<b>pcre_exec()</b> to match each data line. From release 6.0, PCRE supports an
alternative matching function, <b>pcre_dfa_test()</b>, which operates in a
different way, and has some restrictions. The differences between the two
functions are described in the
<a href="pcrematching.html"><b>pcrematching</b></a>
documentation.
</P>
<P>
If a data line contains the \D escape sequence, or if the command line
contains the <b>-dfa</b> option, the alternative matching function is called.
This function finds all possible matches at a given point. If, however, the \F
escape sequence is present in the data line, it stops after the first match is
found. This is always the shortest possible match.
</P>
<br><a name="SEC7" href="#TOC1">DEFAULT OUTPUT FROM PCRETEST</a><br>
<P>
This section describes the output when the normal matching function,
<b>pcre_exec()</b>, is being used.
</P>
<P>
When a match succeeds, pcretest outputs the list of captured substrings that
<b>pcre_exec()</b> returns, starting with number 0 for the string that matched
the whole pattern. Otherwise, it outputs "No match" when the return is
PCRE_ERROR_NOMATCH, and "Partial match:" followed by the partially matching
substring when <b>pcre_exec()</b> returns PCRE_ERROR_PARTIAL. For any other
returns, it outputs the PCRE negative error number. Here is an example of an
interactive <b>pcretest</b> run.
<pre>
  $ pcretest
  PCRE version 7.0 30-Nov-2006

    re&#62; /^abc(\d+)/
  data&#62; abc123
   0: abc123
   1: 123
  data&#62; xyz
  No match
</pre>
Note that unset capturing substrings that are not followed by one that is set
are not returned by <b>pcre_exec()</b>, and are not shown by <b>pcretest</b>. In
the following example, there are two capturing substrings, but when the first
data line is matched, the second, unset substring is not shown. An "internal"
unset substring is shown as "&#60;unset&#62;", as for the second data line.
<pre>
    re&#62; /(a)|(b)/
  data&#62; a
   0: a
   1: a
  data&#62; b
   0: b
   1: &#60;unset&#62;
   2: b
</pre>
If the strings contain any non-printing characters, they are output as \0x
escapes, or as \x{...} escapes if the <b>/8</b> modifier was present on the
pattern. See below for the definition of non-printing characters. If the
pattern has the <b>/+</b> modifier, the output for substring 0 is followed by
the the rest of the subject string, identified by "0+" like this:
<pre>
    re&#62; /cat/+
  data&#62; cataract
   0: cat
   0+ aract
</pre>
If the pattern has the <b>/g</b> or <b>/G</b> modifier, the results of successive
matching attempts are output in sequence, like this:
<pre>
    re&#62; /\Bi(\w\w)/g
  data&#62; Mississippi
   0: iss
   1: ss
   0: iss
   1: ss
   0: ipp
   1: pp
</pre>
"No match" is output only if the first match attempt fails.
</P>
<P>
If any of the sequences <b>\C</b>, <b>\G</b>, or <b>\L</b> are present in a
data line that is successfully matched, the substrings extracted by the
convenience functions are output with C, G, or L after the string number
instead of a colon. This is in addition to the normal full list. The string
length (that is, the return from the extraction function) is given in
parentheses after each string for <b>\C</b> and <b>\G</b>.
</P>
<P>
Note that whereas patterns can be continued over several lines (a plain "&#62;"
prompt is used for continuations), data lines may not. However newlines can be
included in data by means of the \n escape (or \r, \r\n, etc., depending on
the newline sequence setting).
</P>
<br><a name="SEC8" href="#TOC1">OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION</a><br>
<P>
When the alternative matching function, <b>pcre_dfa_exec()</b>, is used (by
means of the \D escape sequence or the <b>-dfa</b> command line option), the
output consists of a list of all the matches that start at the first point in
the subject where there is at least one match. For example:
<pre>
    re&#62; /(tang|tangerine|tan)/
  data&#62; yellow tangerine\D
   0: tangerine
   1: tang
   2: tan
</pre>
(Using the normal matching function on this data finds only "tang".) The
longest matching string is always given first (and numbered zero). After a
PCRE_ERROR_PARTIAL return, the output is "Partial match:", followed by the
partially matching substring.
</P>
<P>
If <b>/g</b> is present on the pattern, the search for further matches resumes
at the end of the longest match. For example:
<pre>
    re&#62; /(tang|tangerine|tan)/g
  data&#62; yellow tangerine and tangy sultana\D
   0: tangerine
   1: tang
   2: tan
   0: tang
   1: tan
   0: tan
</pre>
Since the matching function does not support substring capture, the escape
sequences that are concerned with captured substrings are not relevant.
</P>
<br><a name="SEC9" href="#TOC1">RESTARTING AFTER A PARTIAL MATCH</a><br>
<P>
When the alternative matching function has given the PCRE_ERROR_PARTIAL return,
indicating that the subject partially matched the pattern, you can restart the
match with additional subject data by means of the \R escape sequence. For
example:
<pre>
    re&#62; /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
  data&#62; 23ja\P\D
  Partial match: 23ja
  data&#62; n05\R\D
   0: n05
</pre>
For further information about partial matching, see the
<a href="pcrepartial.html"><b>pcrepartial</b></a>
documentation.
</P>
<br><a name="SEC10" href="#TOC1">CALLOUTS</a><br>
<P>
If the pattern contains any callout requests, <b>pcretest</b>'s callout function
is called during matching. This works with both matching functions. By default,
the called function displays the callout number, the start and current
positions in the text at the callout time, and the next pattern item to be
tested. For example, the output
<pre>
  ---&#62;pqrabcdef
    0    ^  ^     \d
</pre>
indicates that callout number 0 occurred for a match attempt starting at the
fourth character of the subject string, when the pointer was at the seventh
character of the data, and when the next pattern item was \d. Just one
circumflex is output if the start and current positions are the same.
</P>
<P>
Callouts numbered 255 are assumed to be automatic callouts, inserted as a
result of the <b>/C</b> pattern modifier. In this case, instead of showing the
callout number, the offset in the pattern, preceded by a plus, is output. For
example:
<pre>
    re&#62; /\d?[A-E]\*/C
  data&#62; E*
  ---&#62;E*
   +0 ^      \d?
   +3 ^      [A-E]
   +8 ^^     \*
  +10 ^ ^
   0: E*
</pre>
The callout function in <b>pcretest</b> returns zero (carry on matching) by
default, but you can use a \C item in a data line (as described above) to
change this.
</P>
<P>
Inserting callouts can be helpful when using <b>pcretest</b> to check
complicated regular expressions. For further information about callouts, see
the
<a href="pcrecallout.html"><b>pcrecallout</b></a>
documentation.
</P>
<br><a name="SEC11" href="#TOC1">NON-PRINTING CHARACTERS</a><br>
<P>
When <b>pcretest</b> is outputting text in the compiled version of a pattern,
bytes other than 32-126 are always treated as non-printing characters are are
therefore shown as hex escapes.
</P>
<P>
When <b>pcretest</b> is outputting text that is a matched part of a subject
string, it behaves in the same way, unless a different locale has been set for
the pattern (using the <b>/L</b> modifier). In this case, the <b>isprint()</b>
function to distinguish printing and non-printing characters.
</P>
<br><a name="SEC12" href="#TOC1">SAVING AND RELOADING COMPILED PATTERNS</a><br>
<P>
The facilities described in this section are not available when the POSIX
inteface to PCRE is being used, that is, when the <b>/P</b> pattern modifier is
specified.
</P>
<P>
When the POSIX interface is not in use, you can cause <b>pcretest</b> to write a
compiled pattern to a file, by following the modifiers with &#62; and a file name.
For example:
<pre>
  /pattern/im &#62;/some/file
</pre>
See the
<a href="pcreprecompile.html"><b>pcreprecompile</b></a>
documentation for a discussion about saving and re-using compiled patterns.
</P>
<P>
The data that is written is binary. The first eight bytes are the length of the
compiled pattern data followed by the length of the optional study data, each
written as four bytes in big-endian order (most significant byte first). If
there is no study data (either the pattern was not studied, or studying did not
return any data), the second length is zero. The lengths are followed by an
exact copy of the compiled pattern. If there is additional study data, this
follows immediately after the compiled pattern. After writing the file,
<b>pcretest</b> expects to read a new pattern.
</P>
<P>
A saved pattern can be reloaded into <b>pcretest</b> by specifing &#60; and a file
name instead of a pattern. The name of the file must not contain a &#60; character,
as otherwise <b>pcretest</b> will interpret the line as a pattern delimited by &#60;
characters.
For example:
<pre>
   re&#62; &#60;/some/file
  Compiled regex loaded from /some/file
  No study data
</pre>
When the pattern has been loaded, <b>pcretest</b> proceeds to read data lines in
the usual way.
</P>
<P>
You can copy a file written by <b>pcretest</b> to a different host and reload it
there, even if the new host has opposite endianness to the one on which the
pattern was compiled. For example, you can compile on an i86 machine and run on
a SPARC machine.
</P>
<P>
File names for saving and reloading can be absolute or relative, but note that
the shell facility of expanding a file name that starts with a tilde (~) is not
available.
</P>
<P>
The ability to save and reload files in <b>pcretest</b> is intended for testing
and experimentation. It is not intended for production use because only a
single pattern can be written to a file. Furthermore, there is no facility for
supplying custom character tables for use with a reloaded pattern. If the
original pattern was compiled with custom tables, an attempt to match a subject
string using a reloaded pattern is likely to cause <b>pcretest</b> to crash.
Finally, if you attempt to load a file that is not in the correct format, the
result is undefined.
</P>
<br><a name="SEC13" href="#TOC1">SEE ALSO</a><br>
<P>
<b>pcre</b>(3), <b>pcreapi</b>(3), <b>pcrecallout</b>(3), <b>pcrematching</b>(3),
<b>pcrepartial</b>(d), <b>pcrepattern</b>(3), <b>pcreprecompile</b>(3).
</P>
<br><a name="SEC14" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC15" href="#TOC1">REVISION</a><br>
<P>
Last updated: 26 September 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_free_substring.html000064400000002141150403561520020145 0ustar00<html>
<head>
<title>pcre_free_substring specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_free_substring man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>void pcre_free_substring(const char *<i>stringptr</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This is a convenience function for freeing the store obtained by a previous
call to <b>pcre_get_substring()</b> or <b>pcre_get_named_substring()</b>. Its
only argument is a pointer to the string.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcreposix.html000064400000027017150403561520016140 0ustar00<html>
<head>
<title>pcreposix specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcreposix man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<ul>
<li><a name="TOC1" href="#SEC1">SYNOPSIS OF POSIX API</a>
<li><a name="TOC2" href="#SEC2">DESCRIPTION</a>
<li><a name="TOC3" href="#SEC3">COMPILING A PATTERN</a>
<li><a name="TOC4" href="#SEC4">MATCHING NEWLINE CHARACTERS</a>
<li><a name="TOC5" href="#SEC5">MATCHING A PATTERN</a>
<li><a name="TOC6" href="#SEC6">ERROR MESSAGES</a>
<li><a name="TOC7" href="#SEC7">MEMORY USAGE</a>
<li><a name="TOC8" href="#SEC8">AUTHOR</a>
<li><a name="TOC9" href="#SEC9">REVISION</a>
</ul>
<br><a name="SEC1" href="#TOC1">SYNOPSIS OF POSIX API</a><br>
<P>
<b>#include &#60;pcreposix.h&#62;</b>
</P>
<P>
<b>int regcomp(regex_t *<i>preg</i>, const char *<i>pattern</i>,</b>
<b>int <i>cflags</i>);</b>
</P>
<P>
<b>int regexec(regex_t *<i>preg</i>, const char *<i>string</i>,</b>
<b>size_t <i>nmatch</i>, regmatch_t <i>pmatch</i>[], int <i>eflags</i>);</b>
</P>
<P>
<b>size_t regerror(int <i>errcode</i>, const regex_t *<i>preg</i>,</b>
<b>char *<i>errbuf</i>, size_t <i>errbuf_size</i>);</b>
</P>
<P>
<b>void regfree(regex_t *<i>preg</i>);</b>
</P>
<br><a name="SEC2" href="#TOC1">DESCRIPTION</a><br>
<P>
This set of functions provides a POSIX-style API to the PCRE regular expression
package. See the
<a href="pcreapi.html"><b>pcreapi</b></a>
documentation for a description of PCRE's native API, which contains much
additional functionality.
</P>
<P>
The functions described here are just wrapper functions that ultimately call
the PCRE native API. Their prototypes are defined in the <b>pcreposix.h</b>
header file, and on Unix systems the library itself is called
<b>pcreposix.a</b>, so can be accessed by adding <b>-lpcreposix</b> to the
command for linking an application that uses them. Because the POSIX functions
call the native ones, it is also necessary to add <b>-lpcre</b>.
</P>
<P>
I have implemented only those POSIX option bits that can be reasonably mapped
to PCRE native options. In addition, the option REG_EXTENDED is defined with
the value zero. This has no effect, but since programs that are written to the
POSIX interface often use it, this makes it easier to slot in PCRE as a
replacement library. Other POSIX options are not even defined.
</P>
<P>
There are also some other options that are not defined by POSIX. These have
been added at the request of users who want to make use of certain
PCRE-specific features via the POSIX calling interface.
</P>
<P>
When PCRE is called via these functions, it is only the API that is POSIX-like
in style. The syntax and semantics of the regular expressions themselves are
still those of Perl, subject to the setting of various PCRE options, as
described below. "POSIX-like in style" means that the API approximates to the
POSIX definition; it is not fully POSIX-compatible, and in multi-byte encoding
domains it is probably even less compatible.
</P>
<P>
The header for these functions is supplied as <b>pcreposix.h</b> to avoid any
potential clash with other POSIX libraries. It can, of course, be renamed or
aliased as <b>regex.h</b>, which is the "correct" name. It provides two
structure types, <i>regex_t</i> for compiled internal forms, and
<i>regmatch_t</i> for returning captured substrings. It also defines some
constants whose names start with "REG_"; these are used for setting options and
identifying error codes.
</P>
<P>
</P>
<br><a name="SEC3" href="#TOC1">COMPILING A PATTERN</a><br>
<P>
The function <b>regcomp()</b> is called to compile a pattern into an
internal form. The pattern is a C string terminated by a binary zero, and
is passed in the argument <i>pattern</i>. The <i>preg</i> argument is a pointer
to a <b>regex_t</b> structure that is used as a base for storing information
about the compiled regular expression.
</P>
<P>
The argument <i>cflags</i> is either zero, or contains one or more of the bits
defined by the following macros:
<pre>
  REG_DOTALL
</pre>
The PCRE_DOTALL option is set when the regular expression is passed for
compilation to the native function. Note that REG_DOTALL is not part of the
POSIX standard.
<pre>
  REG_ICASE
</pre>
The PCRE_CASELESS option is set when the regular expression is passed for
compilation to the native function.
<pre>
  REG_NEWLINE
</pre>
The PCRE_MULTILINE option is set when the regular expression is passed for
compilation to the native function. Note that this does <i>not</i> mimic the
defined POSIX behaviour for REG_NEWLINE (see the following section).
<pre>
  REG_NOSUB
</pre>
The PCRE_NO_AUTO_CAPTURE option is set when the regular expression is passed
for compilation to the native function. In addition, when a pattern that is
compiled with this flag is passed to <b>regexec()</b> for matching, the
<i>nmatch</i> and <i>pmatch</i> arguments are ignored, and no captured strings
are returned.
<pre>
  REG_UNGREEDY
</pre>
The PCRE_UNGREEDY option is set when the regular expression is passed for
compilation to the native function. Note that REG_UNGREEDY is not part of the
POSIX standard.
<pre>
  REG_UTF8
</pre>
The PCRE_UTF8 option is set when the regular expression is passed for
compilation to the native function. This causes the pattern itself and all data
strings used for matching it to be treated as UTF-8 strings. Note that REG_UTF8
is not part of the POSIX standard.
</P>
<P>
In the absence of these flags, no options are passed to the native function.
This means the the regex is compiled with PCRE default semantics. In
particular, the way it handles newline characters in the subject string is the
Perl way, not the POSIX way. Note that setting PCRE_MULTILINE has only
<i>some</i> of the effects specified for REG_NEWLINE. It does not affect the way
newlines are matched by . (they are not) or by a negative class such as [^a]
(they are).
</P>
<P>
The yield of <b>regcomp()</b> is zero on success, and non-zero otherwise. The
<i>preg</i> structure is filled in on success, and one member of the structure
is public: <i>re_nsub</i> contains the number of capturing subpatterns in
the regular expression. Various error codes are defined in the header file.
</P>
<P>
NOTE: If the yield of <b>regcomp()</b> is non-zero, you must not attempt to
use the contents of the <i>preg</i> structure. If, for example, you pass it to
<b>regexec()</b>, the result is undefined and your program is likely to crash.
</P>
<br><a name="SEC4" href="#TOC1">MATCHING NEWLINE CHARACTERS</a><br>
<P>
This area is not simple, because POSIX and Perl take different views of things.
It is not possible to get PCRE to obey POSIX semantics, but then PCRE was never
intended to be a POSIX engine. The following table lists the different
possibilities for matching newline characters in PCRE:
<pre>
                          Default   Change with

  . matches newline          no     PCRE_DOTALL
  newline matches [^a]       yes    not changeable
  $ matches \n at end        yes    PCRE_DOLLARENDONLY
  $ matches \n in middle     no     PCRE_MULTILINE
  ^ matches \n in middle     no     PCRE_MULTILINE
</pre>
This is the equivalent table for POSIX:
<pre>
                          Default   Change with

  . matches newline          yes    REG_NEWLINE
  newline matches [^a]       yes    REG_NEWLINE
  $ matches \n at end        no     REG_NEWLINE
  $ matches \n in middle     no     REG_NEWLINE
  ^ matches \n in middle     no     REG_NEWLINE
</pre>
PCRE's behaviour is the same as Perl's, except that there is no equivalent for
PCRE_DOLLAR_ENDONLY in Perl. In both PCRE and Perl, there is no way to stop
newline from matching [^a].
</P>
<P>
The default POSIX newline handling can be obtained by setting PCRE_DOTALL and
PCRE_DOLLAR_ENDONLY, but there is no way to make PCRE behave exactly as for the
REG_NEWLINE action.
</P>
<br><a name="SEC5" href="#TOC1">MATCHING A PATTERN</a><br>
<P>
The function <b>regexec()</b> is called to match a compiled pattern <i>preg</i>
against a given <i>string</i>, which is by default terminated by a zero byte
(but see REG_STARTEND below), subject to the options in <i>eflags</i>. These can
be:
<pre>
  REG_NOTBOL
</pre>
The PCRE_NOTBOL option is set when calling the underlying PCRE matching
function.
<pre>
  REG_NOTEMPTY
</pre>
The PCRE_NOTEMPTY option is set when calling the underlying PCRE matching
function. Note that REG_NOTEMPTY is not part of the POSIX standard. However,
setting this option can give more POSIX-like behaviour in some situations.
<pre>
  REG_NOTEOL
</pre>
The PCRE_NOTEOL option is set when calling the underlying PCRE matching
function.
<pre>
  REG_STARTEND
</pre>
The string is considered to start at <i>string</i> + <i>pmatch[0].rm_so</i> and
to have a terminating NUL located at <i>string</i> + <i>pmatch[0].rm_eo</i>
(there need not actually be a NUL at that location), regardless of the value of
<i>nmatch</i>. This is a BSD extension, compatible with but not specified by
IEEE Standard 1003.2 (POSIX.2), and should be used with caution in software
intended to be portable to other systems. Note that a non-zero <i>rm_so</i> does
not imply REG_NOTBOL; REG_STARTEND affects only the location of the string, not
how it is matched.
</P>
<P>
If the pattern was compiled with the REG_NOSUB flag, no data about any matched
strings is returned. The <i>nmatch</i> and <i>pmatch</i> arguments of
<b>regexec()</b> are ignored.
</P>
<P>
If the value of <i>nmatch</i> is zero, or if the value <i>pmatch</i> is NULL,
no data about any matched strings is returned.
</P>
<P>
Otherwise,the portion of the string that was matched, and also any captured
substrings, are returned via the <i>pmatch</i> argument, which points to an
array of <i>nmatch</i> structures of type <i>regmatch_t</i>, containing the
members <i>rm_so</i> and <i>rm_eo</i>. These contain the offset to the first
character of each substring and the offset to the first character after the end
of each substring, respectively. The 0th element of the vector relates to the
entire portion of <i>string</i> that was matched; subsequent elements relate to
the capturing subpatterns of the regular expression. Unused entries in the
array have both structure members set to -1.
</P>
<P>
A successful match yields a zero return; various error codes are defined in the
header file, of which REG_NOMATCH is the "expected" failure code.
</P>
<br><a name="SEC6" href="#TOC1">ERROR MESSAGES</a><br>
<P>
The <b>regerror()</b> function maps a non-zero errorcode from either
<b>regcomp()</b> or <b>regexec()</b> to a printable message. If <i>preg</i> is not
NULL, the error should have arisen from the use of that structure. A message
terminated by a binary zero is placed in <i>errbuf</i>. The length of the
message, including the zero, is limited to <i>errbuf_size</i>. The yield of the
function is the size of buffer needed to hold the whole message.
</P>
<br><a name="SEC7" href="#TOC1">MEMORY USAGE</a><br>
<P>
Compiling a regular expression causes memory to be allocated and associated
with the <i>preg</i> structure. The function <b>regfree()</b> frees all such
memory, after which <i>preg</i> may no longer be used as a compiled expression.
</P>
<br><a name="SEC8" href="#TOC1">AUTHOR</a><br>
<P>
Philip Hazel
<br>
University Computing Service
<br>
Cambridge CB2 3QH, England.
<br>
</P>
<br><a name="SEC9" href="#TOC1">REVISION</a><br>
<P>
Last updated: 02 September 2009
<br>
Copyright &copy; 1997-2009 University of Cambridge.
<br>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/html/pcre_refcount.html000064400000002410150403561520016750 0ustar00<html>
<head>
<title>pcre_refcount specification</title>
</head>
<body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
<h1>pcre_refcount man page</h1>
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
<p>
This page is part of the PCRE HTML documentation. It was generated automatically
from the original man page. If there is any nonsense in it, please consult the
man page, in case the conversion went wrong.
<br>
<br><b>
SYNOPSIS
</b><br>
<P>
<b>#include &#60;pcre.h&#62;</b>
</P>
<P>
<b>int pcre_refcount(pcre *<i>code</i>, int <i>adjust</i>);</b>
</P>
<br><b>
DESCRIPTION
</b><br>
<P>
This function is used to maintain a reference count inside a data block that
contains a compiled pattern. Its arguments are:
<pre>
  <i>code</i>                      Compiled regular expression
  <i>adjust</i>                    Adjustment to reference value
</pre>
The yield of the function is the adjusted reference value, which is constrained
to lie between 0 and 65535.
</P>
<P>
There is a complete description of the PCRE native API in the
<a href="pcreapi.html"><b>pcreapi</b></a>
page and a description of the POSIX API in the
<a href="pcreposix.html"><b>pcreposix</b></a>
page.
<p>
Return to the <a href="index.html">PCRE index page</a>.
</p>
usr/share/doc/alt-pcre802-devel/HACKING000064400000043522150403561530013261 0ustar00Technical Notes about PCRE
--------------------------

These are very rough technical notes that record potentially useful information 
about PCRE internals.

Historical note 1
-----------------

Many years ago I implemented some regular expression functions to an algorithm
suggested by Martin Richards. These were not Unix-like in form, and were quite
restricted in what they could do by comparison with Perl. The interesting part
about the algorithm was that the amount of space required to hold the compiled
form of an expression was known in advance. The code to apply an expression did
not operate by backtracking, as the original Henry Spencer code and current
Perl code does, but instead checked all possibilities simultaneously by keeping
a list of current states and checking all of them as it advanced through the
subject string. In the terminology of Jeffrey Friedl's book, it was a "DFA
algorithm", though it was not a traditional Finite State Machine (FSM). When
the pattern was all used up, all remaining states were possible matches, and
the one matching the longest subset of the subject string was chosen. This did
not necessarily maximize the individual wild portions of the pattern, as is
expected in Unix and Perl-style regular expressions.

Historical note 2
-----------------

By contrast, the code originally written by Henry Spencer (which was
subsequently heavily modified for Perl) compiles the expression twice: once in
a dummy mode in order to find out how much store will be needed, and then for
real. (The Perl version probably doesn't do this any more; I'm talking about
the original library.) The execution function operates by backtracking and
maximizing (or, optionally, minimizing in Perl) the amount of the subject that
matches individual wild portions of the pattern. This is an "NFA algorithm" in
Friedl's terminology.

OK, here's the real stuff
-------------------------

For the set of functions that form the "basic" PCRE library (which are
unrelated to those mentioned above), I tried at first to invent an algorithm
that used an amount of store bounded by a multiple of the number of characters
in the pattern, to save on compiling time. However, because of the greater
complexity in Perl regular expressions, I couldn't do this. In any case, a
first pass through the pattern is helpful for other reasons. 

Computing the memory requirement: how it was
--------------------------------------------

Up to and including release 6.7, PCRE worked by running a very degenerate first
pass to calculate a maximum store size, and then a second pass to do the real
compile - which might use a bit less than the predicted amount of memory. The
idea was that this would turn out faster than the Henry Spencer code because
the first pass is degenerate and the second pass can just store stuff straight
into the vector, which it knows is big enough.

Computing the memory requirement: how it is
-------------------------------------------

By the time I was working on a potential 6.8 release, the degenerate first pass
had become very complicated and hard to maintain. Indeed one of the early
things I did for 6.8 was to fix Yet Another Bug in the memory computation. Then
I had a flash of inspiration as to how I could run the real compile function in
a "fake" mode that enables it to compute how much memory it would need, while
actually only ever using a few hundred bytes of working memory, and without too
many tests of the mode that might slow it down. So I re-factored the compiling
functions to work this way. This got rid of about 600 lines of source. It
should make future maintenance and development easier. As this was such a major 
change, I never released 6.8, instead upping the number to 7.0 (other quite 
major changes were also present in the 7.0 release).

A side effect of this work was that the previous limit of 200 on the nesting
depth of parentheses was removed. However, there is a downside: pcre_compile()
runs more slowly than before (30% or more, depending on the pattern) because it
is doing a full analysis of the pattern. My hope was that this would not be a
big issue, and in the event, nobody has commented on it.

Traditional matching function
-----------------------------

The "traditional", and original, matching function is called pcre_exec(), and 
it implements an NFA algorithm, similar to the original Henry Spencer algorithm 
and the way that Perl works. This is not surprising, since it is intended to be
as compatible with Perl as possible. This is the function most users of PCRE
will use most of the time.

Supplementary matching function
-------------------------------

From PCRE 6.0, there is also a supplementary matching function called 
pcre_dfa_exec(). This implements a DFA matching algorithm that searches 
simultaneously for all possible matches that start at one point in the subject 
string. (Going back to my roots: see Historical Note 1 above.) This function 
intreprets the same compiled pattern data as pcre_exec(); however, not all the 
facilities are available, and those that are do not always work in quite the 
same way. See the user documentation for details.

The algorithm that is used for pcre_dfa_exec() is not a traditional FSM, 
because it may have a number of states active at one time. More work would be 
needed at compile time to produce a traditional FSM where only one state is 
ever active at once. I believe some other regex matchers work this way.


Format of compiled patterns
---------------------------

The compiled form of a pattern is a vector of bytes, containing items of
variable length. The first byte in an item is an opcode, and the length of the
item is either implicit in the opcode or contained in the data bytes that
follow it. 

In many cases below LINK_SIZE data values are specified for offsets within the 
compiled pattern. The default value for LINK_SIZE is 2, but PCRE can be
compiled to use 3-byte or 4-byte values for these offsets (impairing the
performance). This is necessary only when patterns whose compiled length is
greater than 64K are going to be processed. In this description, we assume the
"normal" compilation options. Data values that are counts (e.g. for
quantifiers) are always just two bytes long.

A list of the opcodes follows:


Opcodes with no following data
------------------------------

These items are all just one byte long

  OP_END                 end of pattern
  OP_ANY                 match any one character other than newline
  OP_ALLANY              match any one character, including newline
  OP_ANYBYTE             match any single byte, even in UTF-8 mode
  OP_SOD                 match start of data: \A
  OP_SOM,                start of match (subject + offset): \G
  OP_SET_SOM,            set start of match (\K) 
  OP_CIRC                ^ (start of data, or after \n in multiline)
  OP_NOT_WORD_BOUNDARY   \W
  OP_WORD_BOUNDARY       \w
  OP_NOT_DIGIT           \D
  OP_DIGIT               \d
  OP_NOT_HSPACE          \H
  OP_HSPACE              \h  
  OP_NOT_WHITESPACE      \S
  OP_WHITESPACE          \s
  OP_NOT_VSPACE          \V
  OP_VSPACE              \v  
  OP_NOT_WORDCHAR        \W
  OP_WORDCHAR            \w
  OP_EODN                match end of data or \n at end: \Z
  OP_EOD                 match end of data: \z
  OP_DOLL                $ (end of data, or before \n in multiline)
  OP_EXTUNI              match an extended Unicode character 
  OP_ANYNL               match any Unicode newline sequence 
  
  OP_ACCEPT              ) These are Perl 5.10's "backtracking    
  OP_COMMIT              ) control verbs". If OP_ACCEPT is inside
  OP_FAIL                ) capturing parentheses, it may be preceded 
  OP_PRUNE               ) by one or more OP_CLOSE, followed by a 2-byte 
  OP_SKIP                ) number, indicating which parentheses must be
  OP_THEN                ) closed.
  

Repeating single characters
---------------------------

The common repeats (*, +, ?) when applied to a single character use the
following opcodes:

  OP_STAR
  OP_MINSTAR
  OP_POSSTAR 
  OP_PLUS
  OP_MINPLUS
  OP_POSPLUS 
  OP_QUERY
  OP_MINQUERY
  OP_POSQUERY 

In ASCII mode, these are two-byte items; in UTF-8 mode, the length is variable.
Those with "MIN" in their name are the minimizing versions. Those with "POS" in 
their names are possessive versions. Each is followed by the character that is
to be repeated. Other repeats make use of

  OP_UPTO
  OP_MINUPTO
  OP_POSUPTO 
  OP_EXACT

which are followed by a two-byte count (most significant first) and the
repeated character. OP_UPTO matches from 0 to the given number. A repeat with a
non-zero minimum and a fixed maximum is coded as an OP_EXACT followed by an
OP_UPTO (or OP_MINUPTO or OPT_POSUPTO).


Repeating character types
-------------------------

Repeats of things like \d are done exactly as for single characters, except
that instead of a character, the opcode for the type is stored in the data
byte. The opcodes are:

  OP_TYPESTAR
  OP_TYPEMINSTAR
  OP_TYPEPOSSTAR 
  OP_TYPEPLUS
  OP_TYPEMINPLUS
  OP_TYPEPOSPLUS 
  OP_TYPEQUERY
  OP_TYPEMINQUERY
  OP_TYPEPOSQUERY 
  OP_TYPEUPTO
  OP_TYPEMINUPTO
  OP_TYPEPOSUPTO 
  OP_TYPEEXACT


Match by Unicode property
-------------------------

OP_PROP and OP_NOTPROP are used for positive and negative matches of a 
character by testing its Unicode property (the \p and \P escape sequences).
Each is followed by two bytes that encode the desired property as a type and a 
value.

Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by 
three bytes: OP_PROP or OP_NOTPROP and then the desired property type and 
value.


Matching literal characters
---------------------------

The OP_CHAR opcode is followed by a single character that is to be matched 
casefully. For caseless matching, OP_CHARNC is used. In UTF-8 mode, the 
character may be more than one byte long. (Earlier versions of PCRE used 
multi-character strings, but this was changed to allow some new features to be 
added.)


Character classes
-----------------

If there is only one character, OP_CHAR or OP_CHARNC is used for a positive
class, and OP_NOT for a negative one (that is, for something like [^a]).
However, in UTF-8 mode, the use of OP_NOT applies only to characters with
values < 128, because OP_NOT is confined to single bytes.

Another set of repeating opcodes (OP_NOTSTAR etc.) are used for a repeated,
negated, single-character class. The normal ones (OP_STAR etc.) are used for a
repeated positive single-character class.

When there's more than one character in a class and all the characters are less
than 256, OP_CLASS is used for a positive class, and OP_NCLASS for a negative
one. In either case, the opcode is followed by a 32-byte bit map containing a 1
bit for every character that is acceptable. The bits are counted from the least
significant end of each byte.

The reason for having both OP_CLASS and OP_NCLASS is so that, in UTF-8 mode,
subject characters with values greater than 256 can be handled correctly. For
OP_CLASS they don't match, whereas for OP_NCLASS they do.

For classes containing characters with values > 255, OP_XCLASS is used. It
optionally uses a bit map (if any characters lie within it), followed by a list
of pairs and single characters. There is a flag character than indicates
whether it's a positive or a negative class.


Back references
---------------

OP_REF is followed by two bytes containing the reference number.


Repeating character classes and back references
-----------------------------------------------

Single-character classes are handled specially (see above). This section
applies to OP_CLASS and OP_REF. In both cases, the repeat information follows
the base item. The matching code looks at the following opcode to see if it is
one of

  OP_CRSTAR
  OP_CRMINSTAR
  OP_CRPLUS
  OP_CRMINPLUS
  OP_CRQUERY
  OP_CRMINQUERY
  OP_CRRANGE
  OP_CRMINRANGE

All but the last two are just single-byte items. The others are followed by
four bytes of data, comprising the minimum and maximum repeat counts. There are 
no special possessive opcodes for these repeats; a possessive repeat is 
compiled into an atomic group.


Brackets and alternation
------------------------

A pair of non-capturing (round) brackets is wrapped round each expression at
compile time, so alternation always happens in the context of brackets.

[Note for North Americans: "bracket" to some English speakers, including
myself, can be round, square, curly, or pointy. Hence this usage.]

Non-capturing brackets use the opcode OP_BRA. Originally PCRE was limited to 99
capturing brackets and it used a different opcode for each one. From release
3.5, the limit was removed by putting the bracket number into the data for
higher-numbered brackets. From release 7.0 all capturing brackets are handled
this way, using the single opcode OP_CBRA.

A bracket opcode is followed by LINK_SIZE bytes which give the offset to the
next alternative OP_ALT or, if there aren't any branches, to the matching
OP_KET opcode. Each OP_ALT is followed by LINK_SIZE bytes giving the offset to
the next one, or to the OP_KET opcode. For capturing brackets, the bracket 
number immediately follows the offset, always as a 2-byte item.

OP_KET is used for subpatterns that do not repeat indefinitely, while
OP_KETRMIN and OP_KETRMAX are used for indefinite repetitions, minimally or
maximally respectively. All three are followed by LINK_SIZE bytes giving (as a
positive number) the offset back to the matching bracket opcode.

If a subpattern is quantified such that it is permitted to match zero times, it
is preceded by one of OP_BRAZERO, OP_BRAMINZERO, or OP_SKIPZERO. These are
single-byte opcodes that tell the matcher that skipping the following
subpattern entirely is a valid branch. In the case of the first two, not 
skipping the pattern is also valid (greedy and non-greedy). The third is used 
when a pattern has the quantifier {0,0}. It cannot be entirely discarded, 
because it may be called as a subroutine from elsewhere in the regex.

A subpattern with an indefinite maximum repetition is replicated in the
compiled data its minimum number of times (or once with OP_BRAZERO if the
minimum is zero), with the final copy terminating with OP_KETRMIN or OP_KETRMAX
as appropriate.

A subpattern with a bounded maximum repetition is replicated in a nested
fashion up to the maximum number of times, with OP_BRAZERO or OP_BRAMINZERO
before each replication after the minimum, so that, for example, (abc){2,5} is
compiled as (abc)(abc)((abc)((abc)(abc)?)?)?, except that each bracketed group 
has the same number.

When a repeated subpattern has an unbounded upper limit, it is checked to see 
whether it could match an empty string. If this is the case, the opcode in the 
final replication is changed to OP_SBRA or OP_SCBRA. This tells the matcher
that it needs to check for matching an empty string when it hits OP_KETRMIN or
OP_KETRMAX, and if so, to break the loop.


Assertions
----------

Forward assertions are just like other subpatterns, but starting with one of
the opcodes OP_ASSERT or OP_ASSERT_NOT. Backward assertions use the opcodes
OP_ASSERTBACK and OP_ASSERTBACK_NOT, and the first opcode inside the assertion
is OP_REVERSE, followed by a two byte count of the number of characters to move
back the pointer in the subject string. When operating in UTF-8 mode, the count
is a character count rather than a byte count. A separate count is present in
each alternative of a lookbehind assertion, allowing them to have different
fixed lengths.


Once-only (atomic) subpatterns
------------------------------

These are also just like other subpatterns, but they start with the opcode
OP_ONCE. The check for matching an empty string in an unbounded repeat is 
handled entirely at runtime, so there is just this one opcode.


Conditional subpatterns
-----------------------

These are like other subpatterns, but they start with the opcode OP_COND, or
OP_SCOND for one that might match an empty string in an unbounded repeat. If
the condition is a back reference, this is stored at the start of the
subpattern using the opcode OP_CREF followed by two bytes containing the
reference number. OP_NCREF is used instead if the reference was generated by 
name (so that the runtime code knows to check for duplicate names).

If the condition is "in recursion" (coded as "(?(R)"), or "in recursion of
group x" (coded as "(?(Rx)"), the group number is stored at the start of the
subpattern using the opcode OP_RREF or OP_NRREF (cf OP_NCREF), and a value of
zero for "the whole pattern". For a DEFINE condition, just the single byte
OP_DEF is used (it has no associated data). Otherwise, a conditional subpattern
always starts with one of the assertions.


Recursion
---------

Recursion either matches the current regex, or some subexpression. The opcode
OP_RECURSE is followed by an value which is the offset to the starting bracket
from the start of the whole pattern. From release 6.5, OP_RECURSE is 
automatically wrapped inside OP_ONCE brackets (because otherwise some patterns 
broke it). OP_RECURSE is also used for "subroutine" calls, even though they 
are not strictly a recursion.


Callout
-------

OP_CALLOUT is followed by one byte of data that holds a callout number in the
range 0 to 254 for manual callouts, or 255 for an automatic callout. In both 
cases there follows a two-byte value giving the offset in the pattern to the
start of the following item, and another two-byte item giving the length of the
next item.


Changing options
----------------

If any of the /i, /m, or /s options are changed within a pattern, an OP_OPT
opcode is compiled, followed by one byte containing the new settings of these
flags. If there are several alternatives, there is an occurrence of OP_OPT at
the start of all those following the first options change, to set appropriate
options for the start of the alternative. Immediately after the end of the
group there is another such item to reset the flags to their previous values. A
change of flag right at the very start of the pattern can be handled entirely
at compile time, and so does not cause anything to be put into the compiled
data.

Philip Hazel
October 2009
usr/share/doc/alt-pcre802-devel/pcre.txt000064400001225521150403561530013766 0ustar00-----------------------------------------------------------------------------
This file contains a concatenation of the PCRE man pages, converted to plain
text format for ease of searching with a text editor, or for use on systems
that do not have a man page processor. The small individual files that give
synopses of each function in the library have not been included. Neither has
the pcredemo program. There are separate text files for the pcregrep and
pcretest commands.
-----------------------------------------------------------------------------


PCRE(3)                                                                PCRE(3)


NAME
       PCRE - Perl-compatible regular expressions


INTRODUCTION

       The  PCRE  library is a set of functions that implement regular expres-
       sion pattern matching using the same syntax and semantics as Perl, with
       just  a few differences. Some features that appeared in Python and PCRE
       before they appeared in Perl are also available using the  Python  syn-
       tax,  there  is  some  support for one or two .NET and Oniguruma syntax
       items, and there is an option for requesting some  minor  changes  that
       give better JavaScript compatibility.

       The  current implementation of PCRE corresponds approximately with Perl
       5.10, including support for UTF-8 encoded strings and  Unicode  general
       category  properties.  However,  UTF-8  and  Unicode  support has to be
       explicitly enabled; it is not the default. The  Unicode  tables  corre-
       spond to Unicode release 5.2.0.

       In  addition to the Perl-compatible matching function, PCRE contains an
       alternative function that matches the same compiled patterns in a  dif-
       ferent way. In certain circumstances, the alternative function has some
       advantages.  For a discussion of the two matching algorithms,  see  the
       pcrematching page.

       PCRE  is  written  in C and released as a C library. A number of people
       have written wrappers and interfaces of various kinds.  In  particular,
       Google  Inc.   have  provided  a comprehensive C++ wrapper. This is now
       included as part of the PCRE distribution. The pcrecpp page has details
       of  this  interface.  Other  people's contributions can be found in the
       Contrib directory at the primary FTP site, which is:

       ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre

       Details of exactly which Perl regular expression features are  and  are
       not supported by PCRE are given in separate documents. See the pcrepat-
       tern and pcrecompat pages. There is a syntax summary in the  pcresyntax
       page.

       Some  features  of  PCRE can be included, excluded, or changed when the
       library is built. The pcre_config() function makes it  possible  for  a
       client  to  discover  which  features are available. The features them-
       selves are described in the pcrebuild page. Documentation about  build-
       ing  PCRE  for various operating systems can be found in the README and
       NON-UNIX-USE files in the source distribution.

       The library contains a number of undocumented  internal  functions  and
       data  tables  that  are  used by more than one of the exported external
       functions, but which are not intended  for  use  by  external  callers.
       Their  names  all begin with "_pcre_", which hopefully will not provoke
       any name clashes. In some environments, it is possible to control which
       external  symbols  are  exported when a shared library is built, and in
       these cases the undocumented symbols are not exported.


USER DOCUMENTATION

       The user documentation for PCRE comprises a number  of  different  sec-
       tions.  In the "man" format, each of these is a separate "man page". In
       the HTML format, each is a separate page, linked from the  index  page.
       In  the  plain  text format, all the sections, except the pcredemo sec-
       tion, are concatenated, for ease of searching. The sections are as fol-
       lows:

         pcre              this document
         pcre-config       show PCRE installation configuration information
         pcreapi           details of PCRE's native C API
         pcrebuild         options for building PCRE
         pcrecallout       details of the callout feature
         pcrecompat        discussion of Perl compatibility
         pcrecpp           details of the C++ wrapper
         pcredemo          a demonstration C program that uses PCRE
         pcregrep          description of the pcregrep command
         pcrematching      discussion of the two matching algorithms
         pcrepartial       details of the partial matching facility
         pcrepattern       syntax and semantics of supported
                             regular expressions
         pcreperform       discussion of performance issues
         pcreposix         the POSIX-compatible C API
         pcreprecompile    details of saving and re-using precompiled patterns
         pcresample        discussion of the pcredemo program
         pcrestack         discussion of stack usage
         pcresyntax        quick syntax reference
         pcretest          description of the pcretest testing command

       In  addition,  in the "man" and HTML formats, there is a short page for
       each C library function, listing its arguments and results.


LIMITATIONS

       There are some size limitations in PCRE but it is hoped that they  will
       never in practice be relevant.

       The  maximum  length of a compiled pattern is 65539 (sic) bytes if PCRE
       is compiled with the default internal linkage size of 2. If you want to
       process  regular  expressions  that are truly enormous, you can compile
       PCRE with an internal linkage size of 3 or 4 (see the  README  file  in
       the  source  distribution and the pcrebuild documentation for details).
       In these cases the limit is substantially larger.  However,  the  speed
       of execution is slower.

       All values in repeating quantifiers must be less than 65536.

       There is no limit to the number of parenthesized subpatterns, but there
       can be no more than 65535 capturing subpatterns.

       The maximum length of name for a named subpattern is 32 characters, and
       the maximum number of named subpatterns is 10000.

       The  maximum  length of a subject string is the largest positive number
       that an integer variable can hold. However, when using the  traditional
       matching function, PCRE uses recursion to handle subpatterns and indef-
       inite repetition.  This means that the available stack space may  limit
       the size of a subject string that can be processed by certain patterns.
       For a discussion of stack issues, see the pcrestack documentation.


UTF-8 AND UNICODE PROPERTY SUPPORT

       From release 3.3, PCRE has  had  some  support  for  character  strings
       encoded  in the UTF-8 format. For release 4.0 this was greatly extended
       to cover most common requirements, and in release 5.0  additional  sup-
       port for Unicode general category properties was added.

       In  order  process  UTF-8 strings, you must build PCRE to include UTF-8
       support in the code, and, in addition,  you  must  call  pcre_compile()
       with  the  PCRE_UTF8  option  flag,  or the pattern must start with the
       sequence (*UTF8). When either of these is the case,  both  the  pattern
       and  any  subject  strings  that  are matched against it are treated as
       UTF-8 strings instead of strings of 1-byte characters.

       If you compile PCRE with UTF-8 support, but do not use it at run  time,
       the  library will be a bit bigger, but the additional run time overhead
       is limited to testing the PCRE_UTF8 flag occasionally, so should not be
       very big.

       If PCRE is built with Unicode character property support (which implies
       UTF-8 support), the escape sequences \p{..}, \P{..}, and  \X  are  sup-
       ported.  The available properties that can be tested are limited to the
       general category properties such as Lu for an upper case letter  or  Nd
       for  a  decimal number, the Unicode script names such as Arabic or Han,
       and the derived properties Any and L&. A full  list  is  given  in  the
       pcrepattern documentation. Only the short names for properties are sup-
       ported. For example, \p{L} matches a letter. Its Perl synonym,  \p{Let-
       ter},  is  not  supported.   Furthermore,  in Perl, many properties may
       optionally be prefixed by "Is", for compatibility with Perl  5.6.  PCRE
       does not support this.

   Validity of UTF-8 strings

       When  you  set  the  PCRE_UTF8 flag, the strings passed as patterns and
       subjects are (by default) checked for validity on entry to the relevant
       functions.  From  release 7.3 of PCRE, the check is according the rules
       of RFC 3629, which are themselves derived from the  Unicode  specifica-
       tion.  Earlier  releases  of PCRE followed the rules of RFC 2279, which
       allows the full range of 31-bit values (0 to 0x7FFFFFFF).  The  current
       check allows only values in the range U+0 to U+10FFFF, excluding U+D800
       to U+DFFF.

       The excluded code points are the "Low Surrogate Area"  of  Unicode,  of
       which  the Unicode Standard says this: "The Low Surrogate Area does not
       contain any  character  assignments,  consequently  no  character  code
       charts or namelists are provided for this area. Surrogates are reserved
       for use with UTF-16 and then must be used in pairs."  The  code  points
       that  are  encoded  by  UTF-16  pairs are available as independent code
       points in the UTF-8 encoding. (In  other  words,  the  whole  surrogate
       thing is a fudge for UTF-16 which unfortunately messes up UTF-8.)

       If  an  invalid  UTF-8  string  is  passed  to  PCRE,  an  error return
       (PCRE_ERROR_BADUTF8) is given. In some situations, you may already know
       that your strings are valid, and therefore want to skip these checks in
       order to improve performance. If you set the PCRE_NO_UTF8_CHECK flag at
       compile  time  or at run time, PCRE assumes that the pattern or subject
       it is given (respectively) contains only valid  UTF-8  codes.  In  this
       case, it does not diagnose an invalid UTF-8 string.

       If  you  pass  an  invalid UTF-8 string when PCRE_NO_UTF8_CHECK is set,
       what happens depends on why the string is invalid. If the  string  con-
       forms to the "old" definition of UTF-8 (RFC 2279), it is processed as a
       string of characters in the range 0  to  0x7FFFFFFF.  In  other  words,
       apart from the initial validity test, PCRE (when in UTF-8 mode) handles
       strings according to the more liberal rules of RFC  2279.  However,  if
       the  string does not even conform to RFC 2279, the result is undefined.
       Your program may crash.

       If you want to process strings  of  values  in  the  full  range  0  to
       0x7FFFFFFF,  encoded in a UTF-8-like manner as per the old RFC, you can
       set PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in
       this situation, you will have to apply your own validity check.

   General comments about UTF-8 mode

       1.  An  unbraced  hexadecimal  escape sequence (such as \xb3) matches a
       two-byte UTF-8 character if the value is greater than 127.

       2. Octal numbers up to \777 are recognized, and  match  two-byte  UTF-8
       characters for values greater than \177.

       3.  Repeat quantifiers apply to complete UTF-8 characters, not to indi-
       vidual bytes, for example: \x{100}{3}.

       4. The dot metacharacter matches one UTF-8 character instead of a  sin-
       gle byte.

       5.  The  escape sequence \C can be used to match a single byte in UTF-8
       mode, but its use can lead to some strange effects.  This  facility  is
       not available in the alternative matching function, pcre_dfa_exec().

       6.  The  character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
       test characters of any code value, but the characters that PCRE  recog-
       nizes  as  digits,  spaces,  or  word characters remain the same set as
       before, all with values less than 256. This remains true even when PCRE
       includes  Unicode  property support, because to do otherwise would slow
       down PCRE in many common cases. If you really want to test for a  wider
       sense  of,  say,  "digit",  you must use Unicode property tests such as
       \p{Nd}. Note that this also applies to \b, because  it  is  defined  in
       terms of \w and \W.

       7.  Similarly,  characters that match the POSIX named character classes
       are all low-valued characters.

       8. However, the Perl 5.10 horizontal and vertical  whitespace  matching
       escapes (\h, \H, \v, and \V) do match all the appropriate Unicode char-
       acters.

       9. Case-insensitive matching applies only to  characters  whose  values
       are  less than 128, unless PCRE is built with Unicode property support.
       Even when Unicode property support is available, PCRE  still  uses  its
       own  character  tables when checking the case of low-valued characters,
       so as not to degrade performance.  The Unicode property information  is
       used only for characters with higher values. Even when Unicode property
       support is available, PCRE supports case-insensitive matching only when
       there  is  a  one-to-one  mapping between a letter's cases. There are a
       small number of many-to-one mappings in Unicode;  these  are  not  sup-
       ported by PCRE.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.

       Putting  an actual email address here seems to have been a spam magnet,
       so I've taken it away. If you want to email me, use  my  two  initials,
       followed by the two digits 10, at the domain cam.ac.uk.


REVISION

       Last updated: 01 March 2010
       Copyright (c) 1997-2010 University of Cambridge.
------------------------------------------------------------------------------


PCREBUILD(3)                                                      PCREBUILD(3)


NAME
       PCRE - Perl-compatible regular expressions


PCRE BUILD-TIME OPTIONS

       This  document  describes  the  optional  features  of PCRE that can be
       selected when the library is compiled. It assumes use of the  configure
       script,  where the optional features are selected or deselected by pro-
       viding options to configure before running the make  command.  However,
       the  same  options  can be selected in both Unix-like and non-Unix-like
       environments using the GUI facility of cmake-gui if you are using CMake
       instead of configure to build PCRE.

       There  is  a  lot more information about building PCRE in non-Unix-like
       environments in the file called NON_UNIX_USE, which is part of the PCRE
       distribution.  You  should consult this file as well as the README file
       if you are building in a non-Unix-like environment.

       The complete list of options for configure (which includes the standard
       ones  such  as  the  selection  of  the  installation directory) can be
       obtained by running

         ./configure --help

       The following sections include  descriptions  of  options  whose  names
       begin with --enable or --disable. These settings specify changes to the
       defaults for the configure command. Because of the way  that  configure
       works,  --enable  and --disable always come in pairs, so the complemen-
       tary option always exists as well, but as it specifies the default,  it
       is not described.


C++ SUPPORT

       By default, the configure script will search for a C++ compiler and C++
       header files. If it finds them, it automatically builds the C++ wrapper
       library for PCRE. You can disable this by adding

         --disable-cpp

       to the configure command.


UTF-8 SUPPORT

       To build PCRE with support for UTF-8 Unicode character strings, add

         --enable-utf8

       to  the  configure  command.  Of  itself, this does not make PCRE treat
       strings as UTF-8. As well as compiling PCRE with this option, you  also
       have  have to set the PCRE_UTF8 option when you call the pcre_compile()
       or pcre_compile2() functions.

       If you set --enable-utf8 when compiling in an EBCDIC environment,  PCRE
       expects its input to be either ASCII or UTF-8 (depending on the runtime
       option). It is not possible to support both EBCDIC and UTF-8  codes  in
       the  same  version  of  the  library.  Consequently,  --enable-utf8 and
       --enable-ebcdic are mutually exclusive.


UNICODE CHARACTER PROPERTY SUPPORT

       UTF-8 support allows PCRE to process character values greater than  255
       in  the  strings that it handles. On its own, however, it does not pro-
       vide any facilities for accessing the properties of such characters. If
       you  want  to  be able to use the pattern escapes \P, \p, and \X, which
       refer to Unicode character properties, you must add

         --enable-unicode-properties

       to the configure command. This implies UTF-8 support, even if you  have
       not explicitly requested it.

       Including  Unicode  property  support  adds around 30K of tables to the
       PCRE library. Only the general category properties such as  Lu  and  Nd
       are supported. Details are given in the pcrepattern documentation.


CODE VALUE OF NEWLINE

       By  default,  PCRE interprets the linefeed (LF) character as indicating
       the end of a line. This is the normal newline  character  on  Unix-like
       systems.  You  can compile PCRE to use carriage return (CR) instead, by
       adding

         --enable-newline-is-cr

       to the  configure  command.  There  is  also  a  --enable-newline-is-lf
       option, which explicitly specifies linefeed as the newline character.

       Alternatively, you can specify that line endings are to be indicated by
       the two character sequence CRLF. If you want this, add

         --enable-newline-is-crlf

       to the configure command. There is a fourth option, specified by

         --enable-newline-is-anycrlf

       which causes PCRE to recognize any of the three sequences  CR,  LF,  or
       CRLF as indicating a line ending. Finally, a fifth option, specified by

         --enable-newline-is-any

       causes PCRE to recognize any Unicode newline sequence.

       Whatever  line  ending convention is selected when PCRE is built can be
       overridden when the library functions are called. At build time  it  is
       conventional to use the standard for your operating system.


WHAT \R MATCHES

       By  default,  the  sequence \R in a pattern matches any Unicode newline
       sequence, whatever has been selected as the line  ending  sequence.  If
       you specify

         --enable-bsr-anycrlf

       the  default  is changed so that \R matches only CR, LF, or CRLF. What-
       ever is selected when PCRE is built can be overridden when the  library
       functions are called.


BUILDING SHARED AND STATIC LIBRARIES

       The  PCRE building process uses libtool to build both shared and static
       Unix libraries by default. You can suppress one of these by adding  one
       of

         --disable-shared
         --disable-static

       to the configure command, as required.


POSIX MALLOC USAGE

       When PCRE is called through the POSIX interface (see the pcreposix doc-
       umentation), additional working storage is  required  for  holding  the
       pointers  to capturing substrings, because PCRE requires three integers
       per substring, whereas the POSIX interface provides only  two.  If  the
       number of expected substrings is small, the wrapper function uses space
       on the stack, because this is faster than using malloc() for each call.
       The default threshold above which the stack is no longer used is 10; it
       can be changed by adding a setting such as

         --with-posix-malloc-threshold=20

       to the configure command.


HANDLING VERY LARGE PATTERNS

       Within a compiled pattern, offset values are used  to  point  from  one
       part  to another (for example, from an opening parenthesis to an alter-
       nation metacharacter). By default, two-byte values are used  for  these
       offsets,  leading  to  a  maximum size for a compiled pattern of around
       64K. This is sufficient to handle all but the most  gigantic  patterns.
       Nevertheless,  some  people do want to process truyl enormous patterns,
       so it is possible to compile PCRE to use three-byte or  four-byte  off-
       sets by adding a setting such as

         --with-link-size=3

       to  the  configure  command.  The value given must be 2, 3, or 4. Using
       longer offsets slows down the operation of PCRE because it has to  load
       additional bytes when handling them.


AVOIDING EXCESSIVE STACK USAGE

       When matching with the pcre_exec() function, PCRE implements backtrack-
       ing by making recursive calls to an internal function  called  match().
       In  environments  where  the size of the stack is limited, this can se-
       verely limit PCRE's operation. (The Unix environment does  not  usually
       suffer from this problem, but it may sometimes be necessary to increase
       the maximum stack size.  There is a discussion in the  pcrestack  docu-
       mentation.)  An alternative approach to recursion that uses memory from
       the heap to remember data, instead of using recursive  function  calls,
       has  been  implemented to work round the problem of limited stack size.
       If you want to build a version of PCRE that works this way, add

         --disable-stack-for-recursion

       to the configure command. With this configuration, PCRE  will  use  the
       pcre_stack_malloc  and pcre_stack_free variables to call memory manage-
       ment functions. By default these point to malloc() and free(), but  you
       can replace the pointers so that your own functions are used instead.

       Separate  functions  are  provided  rather  than  using pcre_malloc and
       pcre_free because the  usage  is  very  predictable:  the  block  sizes
       requested  are  always  the  same,  and  the blocks are always freed in
       reverse order. A calling program might be able to  implement  optimized
       functions  that  perform  better  than  malloc()  and free(). PCRE runs
       noticeably more slowly when built in this way. This option affects only
       the pcre_exec() function; it is not relevant for pcre_dfa_exec().


LIMITING PCRE RESOURCE USAGE

       Internally,  PCRE has a function called match(), which it calls repeat-
       edly  (sometimes  recursively)  when  matching  a  pattern   with   the
       pcre_exec()  function.  By controlling the maximum number of times this
       function may be called during a single matching operation, a limit  can
       be  placed  on  the resources used by a single call to pcre_exec(). The
       limit can be changed at run time, as described in the pcreapi  documen-
       tation.  The default is 10 million, but this can be changed by adding a
       setting such as

         --with-match-limit=500000

       to  the  configure  command.  This  setting  has  no  effect   on   the
       pcre_dfa_exec() matching function.

       In  some  environments  it is desirable to limit the depth of recursive
       calls of match() more strictly than the total number of calls, in order
       to  restrict  the maximum amount of stack (or heap, if --disable-stack-
       for-recursion is specified) that is used. A second limit controls this;
       it  defaults  to  the  value  that is set for --with-match-limit, which
       imposes no additional constraints. However, you can set a  lower  limit
       by adding, for example,

         --with-match-limit-recursion=10000

       to  the  configure  command.  This  value can also be overridden at run
       time.


CREATING CHARACTER TABLES AT BUILD TIME

       PCRE uses fixed tables for processing characters whose code values  are
       less  than 256. By default, PCRE is built with a set of tables that are
       distributed in the file pcre_chartables.c.dist. These  tables  are  for
       ASCII codes only. If you add

         --enable-rebuild-chartables

       to  the  configure  command, the distributed tables are no longer used.
       Instead, a program called dftables is compiled and  run.  This  outputs
       the source for new set of tables, created in the default locale of your
       C runtime system. (This method of replacing the tables does not work if
       you  are cross compiling, because dftables is run on the local host. If
       you need to create alternative tables when cross  compiling,  you  will
       have to do so "by hand".)


USING EBCDIC CODE

       PCRE  assumes  by  default that it will run in an environment where the
       character code is ASCII (or Unicode, which is  a  superset  of  ASCII).
       This  is  the  case for most computer operating systems. PCRE can, how-
       ever, be compiled to run in an EBCDIC environment by adding

         --enable-ebcdic

       to the configure command. This setting implies --enable-rebuild-charta-
       bles.  You  should  only  use  it if you know that you are in an EBCDIC
       environment (for example,  an  IBM  mainframe  operating  system).  The
       --enable-ebcdic option is incompatible with --enable-utf8.


PCREGREP OPTIONS FOR COMPRESSED FILE SUPPORT

       By default, pcregrep reads all files as plain text. You can build it so
       that it recognizes files whose names end in .gz or .bz2, and reads them
       with libz or libbz2, respectively, by adding one or both of

         --enable-pcregrep-libz
         --enable-pcregrep-libbz2

       to the configure command. These options naturally require that the rel-
       evant libraries are installed on your system. Configuration  will  fail
       if they are not.


PCRETEST OPTION FOR LIBREADLINE SUPPORT

       If you add

         --enable-pcretest-libreadline

       to  the  configure  command,  pcretest  is  linked with the libreadline
       library, and when its input is from a terminal, it reads it  using  the
       readline() function. This provides line-editing and history facilities.
       Note that libreadline is GPL-licensed, so if you distribute a binary of
       pcretest linked in this way, there may be licensing issues.

       Setting  this  option  causes  the -lreadline option to be added to the
       pcretest build. In many operating environments with  a  sytem-installed
       libreadline this is sufficient. However, in some environments (e.g.  if
       an unmodified distribution version of readline is in use),  some  extra
       configuration  may  be necessary. The INSTALL file for libreadline says
       this:

         "Readline uses the termcap functions, but does not link with the
         termcap or curses library itself, allowing applications which link
         with readline the to choose an appropriate library."

       If your environment has not been set up so that an appropriate  library
       is automatically included, you may need to add something like

         LIBS="-ncurses"

       immediately before the configure command.


SEE ALSO

       pcreapi(3), pcre_config(3).


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 29 September 2009
       Copyright (c) 1997-2009 University of Cambridge.
------------------------------------------------------------------------------


PCREMATCHING(3)                                                PCREMATCHING(3)


NAME
       PCRE - Perl-compatible regular expressions


PCRE MATCHING ALGORITHMS

       This document describes the two different algorithms that are available
       in PCRE for matching a compiled regular expression against a given sub-
       ject  string.  The  "standard"  algorithm  is  the  one provided by the
       pcre_exec() function.  This works in the same was  as  Perl's  matching
       function, and provides a Perl-compatible matching operation.

       An  alternative  algorithm is provided by the pcre_dfa_exec() function;
       this operates in a different way, and is not  Perl-compatible.  It  has
       advantages  and disadvantages compared with the standard algorithm, and
       these are described below.

       When there is only one possible way in which a given subject string can
       match  a pattern, the two algorithms give the same answer. A difference
       arises, however, when there are multiple possibilities. For example, if
       the pattern

         ^<.*>

       is matched against the string

         <something> <something else> <something further>

       there are three possible answers. The standard algorithm finds only one
       of them, whereas the alternative algorithm finds all three.


REGULAR EXPRESSIONS AS TREES

       The set of strings that are matched by a regular expression can be rep-
       resented  as  a  tree structure. An unlimited repetition in the pattern
       makes the tree of infinite size, but it is still a tree.  Matching  the
       pattern  to a given subject string (from a given starting point) can be
       thought of as a search of the tree.  There are two  ways  to  search  a
       tree:  depth-first  and  breadth-first, and these correspond to the two
       matching algorithms provided by PCRE.


THE STANDARD MATCHING ALGORITHM

       In the terminology of Jeffrey Friedl's book "Mastering Regular  Expres-
       sions",  the  standard  algorithm  is an "NFA algorithm". It conducts a
       depth-first search of the pattern tree. That is, it  proceeds  along  a
       single path through the tree, checking that the subject matches what is
       required. When there is a mismatch, the algorithm  tries  any  alterna-
       tives  at  the  current point, and if they all fail, it backs up to the
       previous branch point in the  tree,  and  tries  the  next  alternative
       branch  at  that  level.  This often involves backing up (moving to the
       left) in the subject string as well.  The  order  in  which  repetition
       branches  are  tried  is controlled by the greedy or ungreedy nature of
       the quantifier.

       If a leaf node is reached, a matching string has  been  found,  and  at
       that  point the algorithm stops. Thus, if there is more than one possi-
       ble match, this algorithm returns the first one that it finds.  Whether
       this  is the shortest, the longest, or some intermediate length depends
       on the way the greedy and ungreedy repetition quantifiers are specified
       in the pattern.

       Because  it  ends  up  with a single path through the tree, it is rela-
       tively straightforward for this algorithm to keep  track  of  the  sub-
       strings  that  are  matched  by portions of the pattern in parentheses.
       This provides support for capturing parentheses and back references.


THE ALTERNATIVE MATCHING ALGORITHM

       This algorithm conducts a breadth-first search of  the  tree.  Starting
       from  the  first  matching  point  in the subject, it scans the subject
       string from left to right, once, character by character, and as it does
       this,  it remembers all the paths through the tree that represent valid
       matches. In Friedl's terminology, this is a kind  of  "DFA  algorithm",
       though  it is not implemented as a traditional finite state machine (it
       keeps multiple states active simultaneously).

       Although the general principle of this matching algorithm  is  that  it
       scans  the subject string only once, without backtracking, there is one
       exception: when a lookaround assertion is encountered,  the  characters
       following  or  preceding  the  current  point  have to be independently
       inspected.

       The scan continues until either the end of the subject is  reached,  or
       there  are  no more unterminated paths. At this point, terminated paths
       represent the different matching possibilities (if there are none,  the
       match  has  failed).   Thus,  if there is more than one possible match,
       this algorithm finds all of them, and in particular, it finds the long-
       est.  There  is  an  option to stop the algorithm after the first match
       (which is necessarily the shortest) is found.

       Note that all the matches that are found start at the same point in the
       subject. If the pattern

         cat(er(pillar)?)

       is  matched  against the string "the caterpillar catchment", the result
       will be the three strings "cat", "cater", and "caterpillar" that  start
       at the fourth character of the subject. The algorithm does not automat-
       ically move on to find matches that start at later positions.

       There are a number of features of PCRE regular expressions that are not
       supported by the alternative matching algorithm. They are as follows:

       1.  Because  the  algorithm  finds  all possible matches, the greedy or
       ungreedy nature of repetition quantifiers is not relevant.  Greedy  and
       ungreedy quantifiers are treated in exactly the same way. However, pos-
       sessive quantifiers can make a difference when what follows could  also
       match what is quantified, for example in a pattern like this:

         ^a++\w!

       This  pattern matches "aaab!" but not "aaa!", which would be matched by
       a non-possessive quantifier. Similarly, if an atomic group is  present,
       it  is matched as if it were a standalone pattern at the current point,
       and the longest match is then "locked in" for the rest of  the  overall
       pattern.

       2. When dealing with multiple paths through the tree simultaneously, it
       is not straightforward to keep track of  captured  substrings  for  the
       different  matching  possibilities,  and  PCRE's implementation of this
       algorithm does not attempt to do this. This means that no captured sub-
       strings are available.

       3.  Because no substrings are captured, back references within the pat-
       tern are not supported, and cause errors if encountered.

       4. For the same reason, conditional expressions that use  a  backrefer-
       ence  as  the  condition or test for a specific group recursion are not
       supported.

       5. Because many paths through the tree may be  active,  the  \K  escape
       sequence, which resets the start of the match when encountered (but may
       be on some paths and not on others), is not  supported.  It  causes  an
       error if encountered.

       6.  Callouts  are  supported, but the value of the capture_top field is
       always 1, and the value of the capture_last field is always -1.

       7. The \C escape sequence, which (in the standard algorithm) matches  a
       single  byte, even in UTF-8 mode, is not supported because the alterna-
       tive algorithm moves through the subject  string  one  character  at  a
       time, for all active paths through the tree.

       8.  Except for (*FAIL), the backtracking control verbs such as (*PRUNE)
       are not supported. (*FAIL) is supported, and  behaves  like  a  failing
       negative assertion.


ADVANTAGES OF THE ALTERNATIVE ALGORITHM

       Using  the alternative matching algorithm provides the following advan-
       tages:

       1. All possible matches (at a single point in the subject) are automat-
       ically  found,  and  in particular, the longest match is found. To find
       more than one match using the standard algorithm, you have to do kludgy
       things with callouts.

       2.  Because  the  alternative  algorithm  scans the subject string just
       once, and never needs to backtrack, it is possible to  pass  very  long
       subject  strings  to  the matching function in several pieces, checking
       for partial matching each time.  The  pcrepartial  documentation  gives
       details of partial matching.


DISADVANTAGES OF THE ALTERNATIVE ALGORITHM

       The alternative algorithm suffers from a number of disadvantages:

       1.  It  is  substantially  slower  than the standard algorithm. This is
       partly because it has to search for all possible matches, but  is  also
       because it is less susceptible to optimization.

       2. Capturing parentheses and back references are not supported.

       3. Although atomic groups are supported, their use does not provide the
       performance advantage that it does for the standard algorithm.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 29 September 2009
       Copyright (c) 1997-2009 University of Cambridge.
------------------------------------------------------------------------------


PCREAPI(3)                                                          PCREAPI(3)


NAME
       PCRE - Perl-compatible regular expressions


PCRE NATIVE API

       #include <pcre.h>

       pcre *pcre_compile(const char *pattern, int options,
            const char **errptr, int *erroffset,
            const unsigned char *tableptr);

       pcre *pcre_compile2(const char *pattern, int options,
            int *errorcodeptr,
            const char **errptr, int *erroffset,
            const unsigned char *tableptr);

       pcre_extra *pcre_study(const pcre *code, int options,
            const char **errptr);

       int pcre_exec(const pcre *code, const pcre_extra *extra,
            const char *subject, int length, int startoffset,
            int options, int *ovector, int ovecsize);

       int pcre_dfa_exec(const pcre *code, const pcre_extra *extra,
            const char *subject, int length, int startoffset,
            int options, int *ovector, int ovecsize,
            int *workspace, int wscount);

       int pcre_copy_named_substring(const pcre *code,
            const char *subject, int *ovector,
            int stringcount, const char *stringname,
            char *buffer, int buffersize);

       int pcre_copy_substring(const char *subject, int *ovector,
            int stringcount, int stringnumber, char *buffer,
            int buffersize);

       int pcre_get_named_substring(const pcre *code,
            const char *subject, int *ovector,
            int stringcount, const char *stringname,
            const char **stringptr);

       int pcre_get_stringnumber(const pcre *code,
            const char *name);

       int pcre_get_stringtable_entries(const pcre *code,
            const char *name, char **first, char **last);

       int pcre_get_substring(const char *subject, int *ovector,
            int stringcount, int stringnumber,
            const char **stringptr);

       int pcre_get_substring_list(const char *subject,
            int *ovector, int stringcount, const char ***listptr);

       void pcre_free_substring(const char *stringptr);

       void pcre_free_substring_list(const char **stringptr);

       const unsigned char *pcre_maketables(void);

       int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
            int what, void *where);

       int pcre_info(const pcre *code, int *optptr, int *firstcharptr);

       int pcre_refcount(pcre *code, int adjust);

       int pcre_config(int what, void *where);

       char *pcre_version(void);

       void *(*pcre_malloc)(size_t);

       void (*pcre_free)(void *);

       void *(*pcre_stack_malloc)(size_t);

       void (*pcre_stack_free)(void *);

       int (*pcre_callout)(pcre_callout_block *);


PCRE API OVERVIEW

       PCRE has its own native API, which is described in this document. There
       are also some wrapper functions that correspond to  the  POSIX  regular
       expression  API.  These  are  described in the pcreposix documentation.
       Both of these APIs define a set of C function calls. A C++  wrapper  is
       distributed with PCRE. It is documented in the pcrecpp page.

       The  native  API  C  function prototypes are defined in the header file
       pcre.h, and on Unix systems the library itself is called  libpcre.   It
       can normally be accessed by adding -lpcre to the command for linking an
       application  that  uses  PCRE.  The  header  file  defines  the  macros
       PCRE_MAJOR  and  PCRE_MINOR to contain the major and minor release num-
       bers for the library.  Applications can use these  to  include  support
       for different releases of PCRE.

       The   functions   pcre_compile(),  pcre_compile2(),  pcre_study(),  and
       pcre_exec() are used for compiling and matching regular expressions  in
       a  Perl-compatible  manner. A sample program that demonstrates the sim-
       plest way of using them is provided in the file  called  pcredemo.c  in
       the PCRE source distribution. A listing of this program is given in the
       pcredemo documentation, and the pcresample documentation describes  how
       to compile and run it.

       A second matching function, pcre_dfa_exec(), which is not Perl-compati-
       ble, is also provided. This uses a different algorithm for  the  match-
       ing.  The  alternative algorithm finds all possible matches (at a given
       point in the subject), and scans the subject just  once  (unless  there
       are  lookbehind  assertions).  However,  this algorithm does not return
       captured substrings. A description of the two matching  algorithms  and
       their  advantages  and disadvantages is given in the pcrematching docu-
       mentation.

       In addition to the main compiling and  matching  functions,  there  are
       convenience functions for extracting captured substrings from a subject
       string that is matched by pcre_exec(). They are:

         pcre_copy_substring()
         pcre_copy_named_substring()
         pcre_get_substring()
         pcre_get_named_substring()
         pcre_get_substring_list()
         pcre_get_stringnumber()
         pcre_get_stringtable_entries()

       pcre_free_substring() and pcre_free_substring_list() are also provided,
       to free the memory used for extracted strings.

       The  function  pcre_maketables()  is  used  to build a set of character
       tables  in  the  current  locale   for   passing   to   pcre_compile(),
       pcre_exec(),  or  pcre_dfa_exec(). This is an optional facility that is
       provided for specialist use.  Most  commonly,  no  special  tables  are
       passed,  in  which case internal tables that are generated when PCRE is
       built are used.

       The function pcre_fullinfo() is used to find out  information  about  a
       compiled  pattern; pcre_info() is an obsolete version that returns only
       some of the available information, but is retained for  backwards  com-
       patibility.   The function pcre_version() returns a pointer to a string
       containing the version of PCRE and its date of release.

       The function pcre_refcount() maintains a  reference  count  in  a  data
       block  containing  a compiled pattern. This is provided for the benefit
       of object-oriented applications.

       The global variables pcre_malloc and pcre_free  initially  contain  the
       entry  points  of  the  standard malloc() and free() functions, respec-
       tively. PCRE calls the memory management functions via these variables,
       so  a  calling  program  can replace them if it wishes to intercept the
       calls. This should be done before calling any PCRE functions.

       The global variables pcre_stack_malloc  and  pcre_stack_free  are  also
       indirections  to  memory  management functions. These special functions
       are used only when PCRE is compiled to use  the  heap  for  remembering
       data, instead of recursive function calls, when running the pcre_exec()
       function. See the pcrebuild documentation for  details  of  how  to  do
       this.  It  is  a non-standard way of building PCRE, for use in environ-
       ments that have limited stacks. Because of the greater  use  of  memory
       management,  it  runs  more  slowly. Separate functions are provided so
       that special-purpose external code can be  used  for  this  case.  When
       used,  these  functions  are always called in a stack-like manner (last
       obtained, first freed), and always for memory blocks of the same  size.
       There  is  a discussion about PCRE's stack usage in the pcrestack docu-
       mentation.

       The global variable pcre_callout initially contains NULL. It can be set
       by  the  caller  to  a "callout" function, which PCRE will then call at
       specified points during a matching operation. Details are given in  the
       pcrecallout documentation.


NEWLINES

       PCRE  supports five different conventions for indicating line breaks in
       strings: a single CR (carriage return) character, a  single  LF  (line-
       feed) character, the two-character sequence CRLF, any of the three pre-
       ceding, or any Unicode newline sequence. The Unicode newline  sequences
       are  the  three just mentioned, plus the single characters VT (vertical
       tab, U+000B), FF (formfeed, U+000C), NEL (next line, U+0085), LS  (line
       separator, U+2028), and PS (paragraph separator, U+2029).

       Each  of  the first three conventions is used by at least one operating
       system as its standard newline sequence. When PCRE is built, a  default
       can  be  specified.  The default default is LF, which is the Unix stan-
       dard. When PCRE is run, the default can be overridden,  either  when  a
       pattern is compiled, or when it is matched.

       At compile time, the newline convention can be specified by the options
       argument of pcre_compile(), or it can be specified by special  text  at
       the start of the pattern itself; this overrides any other settings. See
       the pcrepattern page for details of the special character sequences.

       In the PCRE documentation the word "newline" is used to mean "the char-
       acter  or pair of characters that indicate a line break". The choice of
       newline convention affects the handling of  the  dot,  circumflex,  and
       dollar metacharacters, the handling of #-comments in /x mode, and, when
       CRLF is a recognized line ending sequence, the match position  advance-
       ment for a non-anchored pattern. There is more detail about this in the
       section on pcre_exec() options below.

       The choice of newline convention does not affect the interpretation  of
       the  \n  or  \r  escape  sequences, nor does it affect what \R matches,
       which is controlled in a similar way, but by separate options.


MULTITHREADING

       The PCRE functions can be used in  multi-threading  applications,  with
       the  proviso  that  the  memory  management  functions  pointed  to  by
       pcre_malloc, pcre_free, pcre_stack_malloc, and pcre_stack_free, and the
       callout function pointed to by pcre_callout, are shared by all threads.

       The  compiled form of a regular expression is not altered during match-
       ing, so the same compiled pattern can safely be used by several threads
       at once.


SAVING PRECOMPILED PATTERNS FOR LATER USE

       The compiled form of a regular expression can be saved and re-used at a
       later time, possibly by a different program, and even on a  host  other
       than  the  one  on  which  it  was  compiled.  Details are given in the
       pcreprecompile documentation. However, compiling a  regular  expression
       with  one version of PCRE for use with a different version is not guar-
       anteed to work and may cause crashes.


CHECKING BUILD-TIME OPTIONS

       int pcre_config(int what, void *where);

       The function pcre_config() makes it possible for a PCRE client to  dis-
       cover which optional features have been compiled into the PCRE library.
       The pcrebuild documentation has more details about these optional  fea-
       tures.

       The  first  argument  for pcre_config() is an integer, specifying which
       information is required; the second argument is a pointer to a variable
       into  which  the  information  is  placed. The following information is
       available:

         PCRE_CONFIG_UTF8

       The output is an integer that is set to one if UTF-8 support is  avail-
       able; otherwise it is set to zero.

         PCRE_CONFIG_UNICODE_PROPERTIES

       The  output  is  an  integer  that is set to one if support for Unicode
       character properties is available; otherwise it is set to zero.

         PCRE_CONFIG_NEWLINE

       The output is an integer whose value specifies  the  default  character
       sequence  that is recognized as meaning "newline". The four values that
       are supported are: 10 for LF, 13 for CR, 3338 for CRLF, -2 for ANYCRLF,
       and  -1  for  ANY.  Though they are derived from ASCII, the same values
       are returned in EBCDIC environments. The default should normally corre-
       spond to the standard sequence for your operating system.

         PCRE_CONFIG_BSR

       The output is an integer whose value indicates what character sequences
       the \R escape sequence matches by default. A value of 0 means  that  \R
       matches  any  Unicode  line ending sequence; a value of 1 means that \R
       matches only CR, LF, or CRLF. The default can be overridden when a pat-
       tern is compiled or matched.

         PCRE_CONFIG_LINK_SIZE

       The  output  is  an  integer that contains the number of bytes used for
       internal linkage in compiled regular expressions. The value is 2, 3, or
       4.  Larger  values  allow larger regular expressions to be compiled, at
       the expense of slower matching. The default value of  2  is  sufficient
       for  all  but  the  most massive patterns, since it allows the compiled
       pattern to be up to 64K in size.

         PCRE_CONFIG_POSIX_MALLOC_THRESHOLD

       The output is an integer that contains the threshold  above  which  the
       POSIX  interface  uses malloc() for output vectors. Further details are
       given in the pcreposix documentation.

         PCRE_CONFIG_MATCH_LIMIT

       The output is a long integer that gives the default limit for the  num-
       ber  of  internal  matching  function calls in a pcre_exec() execution.
       Further details are given with pcre_exec() below.

         PCRE_CONFIG_MATCH_LIMIT_RECURSION

       The output is a long integer that gives the default limit for the depth
       of   recursion  when  calling  the  internal  matching  function  in  a
       pcre_exec() execution.  Further  details  are  given  with  pcre_exec()
       below.

         PCRE_CONFIG_STACKRECURSE

       The  output is an integer that is set to one if internal recursion when
       running pcre_exec() is implemented by recursive function calls that use
       the  stack  to remember their state. This is the usual way that PCRE is
       compiled. The output is zero if PCRE was compiled to use blocks of data
       on  the  heap  instead  of  recursive  function  calls.  In  this case,
       pcre_stack_malloc and  pcre_stack_free  are  called  to  manage  memory
       blocks on the heap, thus avoiding the use of the stack.


COMPILING A PATTERN

       pcre *pcre_compile(const char *pattern, int options,
            const char **errptr, int *erroffset,
            const unsigned char *tableptr);

       pcre *pcre_compile2(const char *pattern, int options,
            int *errorcodeptr,
            const char **errptr, int *erroffset,
            const unsigned char *tableptr);

       Either of the functions pcre_compile() or pcre_compile2() can be called
       to compile a pattern into an internal form. The only difference between
       the  two interfaces is that pcre_compile2() has an additional argument,
       errorcodeptr, via which a numerical error  code  can  be  returned.  To
       avoid  too  much repetition, we refer just to pcre_compile() below, but
       the information applies equally to pcre_compile2().

       The pattern is a C string terminated by a binary zero, and is passed in
       the  pattern  argument.  A  pointer to a single block of memory that is
       obtained via pcre_malloc is returned. This contains the  compiled  code
       and related data. The pcre type is defined for the returned block; this
       is a typedef for a structure whose contents are not externally defined.
       It is up to the caller to free the memory (via pcre_free) when it is no
       longer required.

       Although the compiled code of a PCRE regex is relocatable, that is,  it
       does not depend on memory location, the complete pcre data block is not
       fully relocatable, because it may contain a copy of the tableptr  argu-
       ment, which is an address (see below).

       The options argument contains various bit settings that affect the com-
       pilation. It should be zero if no options are required.  The  available
       options  are  described  below. Some of them (in particular, those that
       are compatible with Perl, but some others as well) can also be set  and
       unset  from  within  the  pattern  (see the detailed description in the
       pcrepattern documentation). For those options that can be different  in
       different  parts  of  the pattern, the contents of the options argument
       specifies their settings at the start of compilation and execution. The
       PCRE_ANCHORED, PCRE_BSR_xxx, and PCRE_NEWLINE_xxx options can be set at
       the time of matching as well as at compile time.

       If errptr is NULL, pcre_compile() returns NULL immediately.  Otherwise,
       if  compilation  of  a  pattern fails, pcre_compile() returns NULL, and
       sets the variable pointed to by errptr to point to a textual error mes-
       sage. This is a static string that is part of the library. You must not
       try to free it. The byte offset from the start of the  pattern  to  the
       character  that  was  being  processed when the error was discovered is
       placed in the variable pointed to by erroffset, which must not be NULL.
       If  it  is,  an  immediate error is given. Some errors are not detected
       until checks are carried out when the whole pattern has  been  scanned;
       in this case the offset is set to the end of the pattern.

       If  pcre_compile2()  is  used instead of pcre_compile(), and the error-
       codeptr argument is not NULL, a non-zero error code number is  returned
       via  this argument in the event of an error. This is in addition to the
       textual error message. Error codes and messages are listed below.

       If the final argument, tableptr, is NULL, PCRE uses a  default  set  of
       character  tables  that  are  built  when  PCRE  is compiled, using the
       default C locale. Otherwise, tableptr must be an address  that  is  the
       result  of  a  call to pcre_maketables(). This value is stored with the
       compiled pattern, and used again by pcre_exec(), unless  another  table
       pointer is passed to it. For more discussion, see the section on locale
       support below.

       This code fragment shows a typical straightforward  call  to  pcre_com-
       pile():

         pcre *re;
         const char *error;
         int erroffset;
         re = pcre_compile(
           "^A.*Z",          /* the pattern */
           0,                /* default options */
           &error,           /* for error message */
           &erroffset,       /* for error offset */
           NULL);            /* use default character tables */

       The  following  names  for option bits are defined in the pcre.h header
       file:

         PCRE_ANCHORED

       If this bit is set, the pattern is forced to be "anchored", that is, it
       is  constrained to match only at the first matching point in the string
       that is being searched (the "subject string"). This effect can also  be
       achieved  by appropriate constructs in the pattern itself, which is the
       only way to do it in Perl.

         PCRE_AUTO_CALLOUT

       If this bit is set, pcre_compile() automatically inserts callout items,
       all  with  number  255, before each pattern item. For discussion of the
       callout facility, see the pcrecallout documentation.

         PCRE_BSR_ANYCRLF
         PCRE_BSR_UNICODE

       These options (which are mutually exclusive) control what the \R escape
       sequence  matches.  The choice is either to match only CR, LF, or CRLF,
       or to match any Unicode newline sequence. The default is specified when
       PCRE is built. It can be overridden from within the pattern, or by set-
       ting an option when a compiled pattern is matched.

         PCRE_CASELESS

       If this bit is set, letters in the pattern match both upper  and  lower
       case  letters.  It  is  equivalent  to  Perl's /i option, and it can be
       changed within a pattern by a (?i) option setting. In UTF-8 mode,  PCRE
       always  understands the concept of case for characters whose values are
       less than 128, so caseless matching is always possible. For  characters
       with  higher  values,  the concept of case is supported if PCRE is com-
       piled with Unicode property support, but not otherwise. If you want  to
       use  caseless  matching  for  characters 128 and above, you must ensure
       that PCRE is compiled with Unicode property support  as  well  as  with
       UTF-8 support.

         PCRE_DOLLAR_ENDONLY

       If  this bit is set, a dollar metacharacter in the pattern matches only
       at the end of the subject string. Without this option,  a  dollar  also
       matches  immediately before a newline at the end of the string (but not
       before any other newlines). The PCRE_DOLLAR_ENDONLY option  is  ignored
       if  PCRE_MULTILINE  is  set.   There is no equivalent to this option in
       Perl, and no way to set it within a pattern.

         PCRE_DOTALL

       If this bit is set, a dot metacharater in the pattern matches all char-
       acters,  including  those that indicate newline. Without it, a dot does
       not match when the current position is at a  newline.  This  option  is
       equivalent  to Perl's /s option, and it can be changed within a pattern
       by a (?s) option setting. A negative class such as [^a] always  matches
       newline characters, independent of the setting of this option.

         PCRE_DUPNAMES

       If  this  bit is set, names used to identify capturing subpatterns need
       not be unique. This can be helpful for certain types of pattern when it
       is  known  that  only  one instance of the named subpattern can ever be
       matched. There are more details of named subpatterns  below;  see  also
       the pcrepattern documentation.

         PCRE_EXTENDED

       If  this  bit  is  set,  whitespace  data characters in the pattern are
       totally ignored except when escaped or inside a character class. White-
       space does not include the VT character (code 11). In addition, charac-
       ters between an unescaped # outside a character class and the next new-
       line,  inclusive,  are  also  ignored.  This is equivalent to Perl's /x
       option, and it can be changed within a pattern by a  (?x)  option  set-
       ting.

       This  option  makes  it possible to include comments inside complicated
       patterns.  Note, however, that this applies only  to  data  characters.
       Whitespace   characters  may  never  appear  within  special  character
       sequences in a pattern, for  example  within  the  sequence  (?(  which
       introduces a conditional subpattern.

         PCRE_EXTRA

       This  option  was invented in order to turn on additional functionality
       of PCRE that is incompatible with Perl, but it  is  currently  of  very
       little  use. When set, any backslash in a pattern that is followed by a
       letter that has no special meaning  causes  an  error,  thus  reserving
       these  combinations  for  future  expansion.  By default, as in Perl, a
       backslash followed by a letter with no special meaning is treated as  a
       literal.  (Perl can, however, be persuaded to give a warning for this.)
       There are at present no other features controlled by  this  option.  It
       can also be set by a (?X) option setting within a pattern.

         PCRE_FIRSTLINE

       If  this  option  is  set,  an  unanchored pattern is required to match
       before or at the first  newline  in  the  subject  string,  though  the
       matched text may continue over the newline.

         PCRE_JAVASCRIPT_COMPAT

       If this option is set, PCRE's behaviour is changed in some ways so that
       it is compatible with JavaScript rather than Perl. The changes  are  as
       follows:

       (1)  A  lone  closing square bracket in a pattern causes a compile-time
       error, because this is illegal in JavaScript (by default it is  treated
       as a data character). Thus, the pattern AB]CD becomes illegal when this
       option is set.

       (2) At run time, a back reference to an unset subpattern group  matches
       an  empty  string (by default this causes the current matching alterna-
       tive to fail). A pattern such as (\1)(a) succeeds when this  option  is
       set  (assuming  it can find an "a" in the subject), whereas it fails by
       default, for Perl compatibility.

         PCRE_MULTILINE

       By default, PCRE treats the subject string as consisting  of  a  single
       line  of characters (even if it actually contains newlines). The "start
       of line" metacharacter (^) matches only at the  start  of  the  string,
       while  the  "end  of line" metacharacter ($) matches only at the end of
       the string, or before a terminating newline (unless PCRE_DOLLAR_ENDONLY
       is set). This is the same as Perl.

       When  PCRE_MULTILINE  it  is set, the "start of line" and "end of line"
       constructs match immediately following or immediately  before  internal
       newlines  in  the  subject string, respectively, as well as at the very
       start and end. This is equivalent to Perl's /m option, and  it  can  be
       changed within a pattern by a (?m) option setting. If there are no new-
       lines in a subject string, or no occurrences of ^ or $  in  a  pattern,
       setting PCRE_MULTILINE has no effect.

         PCRE_NEWLINE_CR
         PCRE_NEWLINE_LF
         PCRE_NEWLINE_CRLF
         PCRE_NEWLINE_ANYCRLF
         PCRE_NEWLINE_ANY

       These  options  override the default newline definition that was chosen
       when PCRE was built. Setting the first or the second specifies  that  a
       newline  is  indicated  by a single character (CR or LF, respectively).
       Setting PCRE_NEWLINE_CRLF specifies that a newline is indicated by  the
       two-character  CRLF  sequence.  Setting  PCRE_NEWLINE_ANYCRLF specifies
       that any of the three preceding sequences should be recognized. Setting
       PCRE_NEWLINE_ANY  specifies that any Unicode newline sequence should be
       recognized. The Unicode newline sequences are the three just mentioned,
       plus  the  single  characters  VT (vertical tab, U+000B), FF (formfeed,
       U+000C), NEL (next line, U+0085), LS (line separator, U+2028),  and  PS
       (paragraph  separator,  U+2029).  The  last  two are recognized only in
       UTF-8 mode.

       The newline setting in the  options  word  uses  three  bits  that  are
       treated as a number, giving eight possibilities. Currently only six are
       used (default plus the five values above). This means that if  you  set
       more  than one newline option, the combination may or may not be sensi-
       ble. For example, PCRE_NEWLINE_CR with PCRE_NEWLINE_LF is equivalent to
       PCRE_NEWLINE_CRLF,  but other combinations may yield unused numbers and
       cause an error.

       The only time that a line break is specially recognized when  compiling
       a  pattern  is  if  PCRE_EXTENDED  is set, and an unescaped # outside a
       character class is encountered. This indicates  a  comment  that  lasts
       until  after the next line break sequence. In other circumstances, line
       break  sequences  are  treated  as  literal  data,   except   that   in
       PCRE_EXTENDED mode, both CR and LF are treated as whitespace characters
       and are therefore ignored.

       The newline option that is set at compile time becomes the default that
       is used for pcre_exec() and pcre_dfa_exec(), but it can be overridden.

         PCRE_NO_AUTO_CAPTURE

       If this option is set, it disables the use of numbered capturing paren-
       theses in the pattern. Any opening parenthesis that is not followed  by
       ?  behaves as if it were followed by ?: but named parentheses can still
       be used for capturing (and they acquire  numbers  in  the  usual  way).
       There is no equivalent of this option in Perl.

         PCRE_UNGREEDY

       This  option  inverts  the "greediness" of the quantifiers so that they
       are not greedy by default, but become greedy if followed by "?". It  is
       not  compatible  with Perl. It can also be set by a (?U) option setting
       within the pattern.

         PCRE_UTF8

       This option causes PCRE to regard both the pattern and the  subject  as
       strings  of  UTF-8 characters instead of single-byte character strings.
       However, it is available only when PCRE is built to include UTF-8  sup-
       port.  If not, the use of this option provokes an error. Details of how
       this option changes the behaviour of PCRE are given in the  section  on
       UTF-8 support in the main pcre page.

         PCRE_NO_UTF8_CHECK

       When PCRE_UTF8 is set, the validity of the pattern as a UTF-8 string is
       automatically checked. There is a  discussion  about  the  validity  of
       UTF-8  strings  in  the main pcre page. If an invalid UTF-8 sequence of
       bytes is found, pcre_compile() returns an error. If  you  already  know
       that your pattern is valid, and you want to skip this check for perfor-
       mance reasons, you can set the PCRE_NO_UTF8_CHECK option.  When  it  is
       set,  the  effect  of  passing  an invalid UTF-8 string as a pattern is
       undefined. It may cause your program to crash. Note  that  this  option
       can  also be passed to pcre_exec() and pcre_dfa_exec(), to suppress the
       UTF-8 validity checking of subject strings.


COMPILATION ERROR CODES

       The following table lists the error  codes  than  may  be  returned  by
       pcre_compile2(),  along with the error messages that may be returned by
       both compiling functions. As PCRE has developed, some error codes  have
       fallen out of use. To avoid confusion, they have not been re-used.

          0  no error
          1  \ at end of pattern
          2  \c at end of pattern
          3  unrecognized character follows \
          4  numbers out of order in {} quantifier
          5  number too big in {} quantifier
          6  missing terminating ] for character class
          7  invalid escape sequence in character class
          8  range out of order in character class
          9  nothing to repeat
         10  [this code is not in use]
         11  internal error: unexpected repeat
         12  unrecognized character after (? or (?-
         13  POSIX named classes are supported only within a class
         14  missing )
         15  reference to non-existent subpattern
         16  erroffset passed as NULL
         17  unknown option bit(s) set
         18  missing ) after comment
         19  [this code is not in use]
         20  regular expression is too large
         21  failed to get memory
         22  unmatched parentheses
         23  internal error: code overflow
         24  unrecognized character after (?<
         25  lookbehind assertion is not fixed length
         26  malformed number or name after (?(
         27  conditional group contains more than two branches
         28  assertion expected after (?(
         29  (?R or (?[+-]digits must be followed by )
         30  unknown POSIX class name
         31  POSIX collating elements are not supported
         32  this version of PCRE is not compiled with PCRE_UTF8 support
         33  [this code is not in use]
         34  character value in \x{...} sequence is too large
         35  invalid condition (?(0)
         36  \C not allowed in lookbehind assertion
         37  PCRE does not support \L, \l, \N, \U, or \u
         38  number after (?C is > 255
         39  closing ) for (?C expected
         40  recursive call could loop indefinitely
         41  unrecognized character after (?P
         42  syntax error in subpattern name (missing terminator)
         43  two named subpatterns have the same name
         44  invalid UTF-8 string
         45  support for \P, \p, and \X has not been compiled
         46  malformed \P or \p sequence
         47  unknown property name after \P or \p
         48  subpattern name is too long (maximum 32 characters)
         49  too many named subpatterns (maximum 10000)
         50  [this code is not in use]
         51  octal value is greater than \377 (not in UTF-8 mode)
         52  internal error: overran compiling workspace
         53   internal  error:  previously-checked  referenced  subpattern not
       found
         54  DEFINE group contains more than one branch
         55  repeating a DEFINE group is not allowed
         56  inconsistent NEWLINE options
         57  \g is not followed by a braced, angle-bracketed, or quoted
               name/number or by a plain number
         58  a numbered reference must not be zero
         59  (*VERB) with an argument is not supported
         60  (*VERB) not recognized
         61  number is too big
         62  subpattern name expected
         63  digit expected after (?+
         64  ] is an invalid data character in JavaScript compatibility mode

       The numbers 32 and 10000 in errors 48 and 49  are  defaults;  different
       values may be used if the limits were changed when PCRE was built.


STUDYING A PATTERN

       pcre_extra *pcre_study(const pcre *code, int options
            const char **errptr);

       If  a  compiled  pattern is going to be used several times, it is worth
       spending more time analyzing it in order to speed up the time taken for
       matching.  The function pcre_study() takes a pointer to a compiled pat-
       tern as its first argument. If studying the pattern produces additional
       information  that  will  help speed up matching, pcre_study() returns a
       pointer to a pcre_extra block, in which the study_data field points  to
       the results of the study.

       The  returned  value  from  pcre_study()  can  be  passed  directly  to
       pcre_exec() or pcre_dfa_exec(). However, a pcre_extra block  also  con-
       tains  other  fields  that can be set by the caller before the block is
       passed; these are described below in the section on matching a pattern.

       If studying the  pattern  does  not  produce  any  useful  information,
       pcre_study() returns NULL. In that circumstance, if the calling program
       wants  to  pass  any  of   the   other   fields   to   pcre_exec()   or
       pcre_dfa_exec(), it must set up its own pcre_extra block.

       The  second  argument of pcre_study() contains option bits. At present,
       no options are defined, and this argument should always be zero.

       The third argument for pcre_study() is a pointer for an error  message.
       If  studying  succeeds  (even  if no data is returned), the variable it
       points to is set to NULL. Otherwise it is set to  point  to  a  textual
       error message. This is a static string that is part of the library. You
       must not try to free it. You should test the  error  pointer  for  NULL
       after calling pcre_study(), to be sure that it has run successfully.

       This is a typical call to pcre_study():

         pcre_extra *pe;
         pe = pcre_study(
           re,             /* result of pcre_compile() */
           0,              /* no options exist */
           &error);        /* set to NULL or points to a message */

       Studying a pattern does two things: first, a lower bound for the length
       of subject string that is needed to match the pattern is computed. This
       does not mean that there are any strings of that length that match, but
       it does guarantee that no shorter strings match. The value is  used  by
       pcre_exec()  and  pcre_dfa_exec()  to  avoid  wasting time by trying to
       match strings that are shorter than the lower bound. You can  find  out
       the value in a calling program via the pcre_fullinfo() function.

       Studying a pattern is also useful for non-anchored patterns that do not
       have a single fixed starting character. A bitmap of  possible  starting
       bytes  is  created. This speeds up finding a position in the subject at
       which to start matching.


LOCALE SUPPORT

       PCRE handles caseless matching, and determines whether  characters  are
       letters,  digits, or whatever, by reference to a set of tables, indexed
       by character value. When running in UTF-8 mode, this  applies  only  to
       characters  with  codes  less than 128. Higher-valued codes never match
       escapes such as \w or \d, but can be tested with \p if  PCRE  is  built
       with  Unicode  character property support. The use of locales with Uni-
       code is discouraged. If you are handling characters with codes  greater
       than  128, you should either use UTF-8 and Unicode, or use locales, but
       not try to mix the two.

       PCRE contains an internal set of tables that are used  when  the  final
       argument  of  pcre_compile()  is  NULL.  These  are sufficient for many
       applications.  Normally, the internal tables recognize only ASCII char-
       acters. However, when PCRE is built, it is possible to cause the inter-
       nal tables to be rebuilt in the default "C" locale of the local system,
       which may cause them to be different.

       The  internal tables can always be overridden by tables supplied by the
       application that calls PCRE. These may be created in a different locale
       from  the  default.  As more and more applications change to using Uni-
       code, the need for this locale support is expected to die away.

       External tables are built by calling  the  pcre_maketables()  function,
       which  has no arguments, in the relevant locale. The result can then be
       passed to pcre_compile() or pcre_exec()  as  often  as  necessary.  For
       example,  to  build  and use tables that are appropriate for the French
       locale (where accented characters with  values  greater  than  128  are
       treated as letters), the following code could be used:

         setlocale(LC_CTYPE, "fr_FR");
         tables = pcre_maketables();
         re = pcre_compile(..., tables);

       The  locale  name "fr_FR" is used on Linux and other Unix-like systems;
       if you are using Windows, the name for the French locale is "french".

       When pcre_maketables() runs, the tables are built  in  memory  that  is
       obtained  via  pcre_malloc. It is the caller's responsibility to ensure
       that the memory containing the tables remains available for as long  as
       it is needed.

       The pointer that is passed to pcre_compile() is saved with the compiled
       pattern, and the same tables are used via this pointer by  pcre_study()
       and normally also by pcre_exec(). Thus, by default, for any single pat-
       tern, compilation, studying and matching all happen in the same locale,
       but different patterns can be compiled in different locales.

       It  is  possible to pass a table pointer or NULL (indicating the use of
       the internal tables) to pcre_exec(). Although  not  intended  for  this
       purpose,  this facility could be used to match a pattern in a different
       locale from the one in which it was compiled. Passing table pointers at
       run time is discussed below in the section on matching a pattern.


INFORMATION ABOUT A PATTERN

       int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
            int what, void *where);

       The  pcre_fullinfo() function returns information about a compiled pat-
       tern. It replaces the obsolete pcre_info() function, which is neverthe-
       less retained for backwards compability (and is documented below).

       The  first  argument  for  pcre_fullinfo() is a pointer to the compiled
       pattern. The second argument is the result of pcre_study(), or NULL  if
       the  pattern  was not studied. The third argument specifies which piece
       of information is required, and the fourth argument is a pointer  to  a
       variable  to  receive  the  data. The yield of the function is zero for
       success, or one of the following negative numbers:

         PCRE_ERROR_NULL       the argument code was NULL
                               the argument where was NULL
         PCRE_ERROR_BADMAGIC   the "magic number" was not found
         PCRE_ERROR_BADOPTION  the value of what was invalid

       The "magic number" is placed at the start of each compiled  pattern  as
       an  simple check against passing an arbitrary memory pointer. Here is a
       typical call of pcre_fullinfo(), to obtain the length of  the  compiled
       pattern:

         int rc;
         size_t length;
         rc = pcre_fullinfo(
           re,               /* result of pcre_compile() */
           pe,               /* result of pcre_study(), or NULL */
           PCRE_INFO_SIZE,   /* what is required */
           &length);         /* where to put the data */

       The  possible  values for the third argument are defined in pcre.h, and
       are as follows:

         PCRE_INFO_BACKREFMAX

       Return the number of the highest back reference  in  the  pattern.  The
       fourth  argument  should  point to an int variable. Zero is returned if
       there are no back references.

         PCRE_INFO_CAPTURECOUNT

       Return the number of capturing subpatterns in the pattern.  The  fourth
       argument should point to an int variable.

         PCRE_INFO_DEFAULT_TABLES

       Return  a pointer to the internal default character tables within PCRE.
       The fourth argument should point to an unsigned char *  variable.  This
       information call is provided for internal use by the pcre_study() func-
       tion. External callers can cause PCRE to use  its  internal  tables  by
       passing a NULL table pointer.

         PCRE_INFO_FIRSTBYTE

       Return  information  about  the first byte of any matched string, for a
       non-anchored pattern. The fourth argument should point to an int  vari-
       able.  (This option used to be called PCRE_INFO_FIRSTCHAR; the old name
       is still recognized for backwards compatibility.)

       If there is a fixed first byte, for example, from  a  pattern  such  as
       (cat|cow|coyote), its value is returned. Otherwise, if either

       (a)  the pattern was compiled with the PCRE_MULTILINE option, and every
       branch starts with "^", or

       (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not
       set (if it were set, the pattern would be anchored),

       -1  is  returned, indicating that the pattern matches only at the start
       of a subject string or after any newline within the  string.  Otherwise
       -2 is returned. For anchored patterns, -2 is returned.

         PCRE_INFO_FIRSTTABLE

       If  the pattern was studied, and this resulted in the construction of a
       256-bit table indicating a fixed set of bytes for the first byte in any
       matching  string, a pointer to the table is returned. Otherwise NULL is
       returned. The fourth argument should point to an unsigned char *  vari-
       able.

         PCRE_INFO_HASCRORLF

       Return  1  if  the  pattern  contains any explicit matches for CR or LF
       characters, otherwise 0. The fourth argument should  point  to  an  int
       variable.  An explicit match is either a literal CR or LF character, or
       \r or \n.

         PCRE_INFO_JCHANGED

       Return 1 if the (?J) or (?-J) option setting is used  in  the  pattern,
       otherwise  0. The fourth argument should point to an int variable. (?J)
       and (?-J) set and unset the local PCRE_DUPNAMES option, respectively.

         PCRE_INFO_LASTLITERAL

       Return the value of the rightmost literal byte that must exist  in  any
       matched  string,  other  than  at  its  start,  if such a byte has been
       recorded. The fourth argument should point to an int variable. If there
       is  no such byte, -1 is returned. For anchored patterns, a last literal
       byte is recorded only if it follows something of variable  length.  For
       example, for the pattern /^a\d+z\d+/ the returned value is "z", but for
       /^a\dz\d/ the returned value is -1.

         PCRE_INFO_MINLENGTH

       If the pattern was studied and a minimum length  for  matching  subject
       strings  was  computed,  its  value is returned. Otherwise the returned
       value is -1. The value is a number of characters, not bytes  (this  may
       be  relevant in UTF-8 mode). The fourth argument should point to an int
       variable. A non-negative value is a lower bound to the  length  of  any
       matching  string.  There  may not be any strings of that length that do
       actually match, but every string that does match is at least that long.

         PCRE_INFO_NAMECOUNT
         PCRE_INFO_NAMEENTRYSIZE
         PCRE_INFO_NAMETABLE

       PCRE supports the use of named as well as numbered capturing  parenthe-
       ses.  The names are just an additional way of identifying the parenthe-
       ses, which still acquire numbers. Several convenience functions such as
       pcre_get_named_substring()  are  provided  for extracting captured sub-
       strings by name. It is also possible to extract the data  directly,  by
       first  converting  the  name to a number in order to access the correct
       pointers in the output vector (described with pcre_exec() below). To do
       the  conversion,  you  need  to  use  the  name-to-number map, which is
       described by these three values.

       The map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT
       gives the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size
       of each entry; both of these  return  an  int  value.  The  entry  size
       depends  on the length of the longest name. PCRE_INFO_NAMETABLE returns
       a pointer to the first entry of the table  (a  pointer  to  char).  The
       first two bytes of each entry are the number of the capturing parenthe-
       sis, most significant byte first. The rest of the entry is  the  corre-
       sponding name, zero terminated.

       The  names are in alphabetical order. Duplicate names may appear if (?|
       is used to create multiple groups with the same number, as described in
       the  section  on  duplicate subpattern numbers in the pcrepattern page.
       Duplicate names for subpatterns with different  numbers  are  permitted
       only  if  PCRE_DUPNAMES  is  set. In all cases of duplicate names, they
       appear in the table in the order in which they were found in  the  pat-
       tern.  In  the  absence  of (?| this is the order of increasing number;
       when (?| is used this is not necessarily the case because later subpat-
       terns may have lower numbers.

       As  a  simple  example of the name/number table, consider the following
       pattern (assume PCRE_EXTENDED is set, so white space -  including  new-
       lines - is ignored):

         (?<date> (?<year>(\d\d)?\d\d) -
         (?<month>\d\d) - (?<day>\d\d) )

       There  are  four  named subpatterns, so the table has four entries, and
       each entry in the table is eight bytes long. The table is  as  follows,
       with non-printing bytes shows in hexadecimal, and undefined bytes shown
       as ??:

         00 01 d  a  t  e  00 ??
         00 05 d  a  y  00 ?? ??
         00 04 m  o  n  t  h  00
         00 02 y  e  a  r  00 ??

       When writing code to extract data  from  named  subpatterns  using  the
       name-to-number  map,  remember that the length of the entries is likely
       to be different for each compiled pattern.

         PCRE_INFO_OKPARTIAL

       Return 1  if  the  pattern  can  be  used  for  partial  matching  with
       pcre_exec(),  otherwise  0.  The fourth argument should point to an int
       variable. From  release  8.00,  this  always  returns  1,  because  the
       restrictions  that  previously  applied  to  partial matching have been
       lifted. The pcrepartial documentation gives details of  partial  match-
       ing.

         PCRE_INFO_OPTIONS

       Return  a  copy of the options with which the pattern was compiled. The
       fourth argument should point to an unsigned long  int  variable.  These
       option bits are those specified in the call to pcre_compile(), modified
       by any top-level option settings at the start of the pattern itself. In
       other  words,  they are the options that will be in force when matching
       starts. For example, if the pattern /(?im)abc(?-i)d/ is  compiled  with
       the  PCRE_EXTENDED option, the result is PCRE_CASELESS, PCRE_MULTILINE,
       and PCRE_EXTENDED.

       A pattern is automatically anchored by PCRE if  all  of  its  top-level
       alternatives begin with one of the following:

         ^     unless PCRE_MULTILINE is set
         \A    always
         \G    always
         .*    if PCRE_DOTALL is set and there are no back
                 references to the subpattern in which .* appears

       For such patterns, the PCRE_ANCHORED bit is set in the options returned
       by pcre_fullinfo().

         PCRE_INFO_SIZE

       Return the size of the compiled pattern, that is, the  value  that  was
       passed as the argument to pcre_malloc() when PCRE was getting memory in
       which to place the compiled data. The fourth argument should point to a
       size_t variable.

         PCRE_INFO_STUDYSIZE

       Return the size of the data block pointed to by the study_data field in
       a pcre_extra block. That is,  it  is  the  value  that  was  passed  to
       pcre_malloc() when PCRE was getting memory into which to place the data
       created by pcre_study(). If pcre_extra is NULL, or there  is  no  study
       data,  zero  is  returned. The fourth argument should point to a size_t
       variable.


OBSOLETE INFO FUNCTION

       int pcre_info(const pcre *code, int *optptr, int *firstcharptr);

       The pcre_info() function is now obsolete because its interface  is  too
       restrictive  to return all the available data about a compiled pattern.
       New  programs  should  use  pcre_fullinfo()  instead.  The   yield   of
       pcre_info()  is the number of capturing subpatterns, or one of the fol-
       lowing negative numbers:

         PCRE_ERROR_NULL       the argument code was NULL
         PCRE_ERROR_BADMAGIC   the "magic number" was not found

       If the optptr argument is not NULL, a copy of the  options  with  which
       the  pattern  was  compiled  is placed in the integer it points to (see
       PCRE_INFO_OPTIONS above).

       If the pattern is not anchored and the  firstcharptr  argument  is  not
       NULL,  it is used to pass back information about the first character of
       any matched string (see PCRE_INFO_FIRSTBYTE above).


REFERENCE COUNTS

       int pcre_refcount(pcre *code, int adjust);

       The pcre_refcount() function is used to maintain a reference  count  in
       the data block that contains a compiled pattern. It is provided for the
       benefit of applications that  operate  in  an  object-oriented  manner,
       where different parts of the application may be using the same compiled
       pattern, but you want to free the block when they are all done.

       When a pattern is compiled, the reference count field is initialized to
       zero.   It is changed only by calling this function, whose action is to
       add the adjust value (which may be positive or  negative)  to  it.  The
       yield of the function is the new value. However, the value of the count
       is constrained to lie between 0 and 65535, inclusive. If the new  value
       is outside these limits, it is forced to the appropriate limit value.

       Except  when it is zero, the reference count is not correctly preserved
       if a pattern is compiled on one host and then  transferred  to  a  host
       whose byte-order is different. (This seems a highly unlikely scenario.)


MATCHING A PATTERN: THE TRADITIONAL FUNCTION

       int pcre_exec(const pcre *code, const pcre_extra *extra,
            const char *subject, int length, int startoffset,
            int options, int *ovector, int ovecsize);

       The  function pcre_exec() is called to match a subject string against a
       compiled pattern, which is passed in the code argument. If the  pattern
       was  studied,  the  result  of  the study should be passed in the extra
       argument. This function is the main matching facility of  the  library,
       and it operates in a Perl-like manner. For specialist use there is also
       an alternative matching function, which is described below in the  sec-
       tion about the pcre_dfa_exec() function.

       In  most applications, the pattern will have been compiled (and option-
       ally studied) in the same process that calls pcre_exec().  However,  it
       is possible to save compiled patterns and study data, and then use them
       later in different processes, possibly even on different hosts.  For  a
       discussion about this, see the pcreprecompile documentation.

       Here is an example of a simple call to pcre_exec():

         int rc;
         int ovector[30];
         rc = pcre_exec(
           re,             /* result of pcre_compile() */
           NULL,           /* we didn't study the pattern */
           "some string",  /* the subject string */
           11,             /* the length of the subject string */
           0,              /* start at offset 0 in the subject */
           0,              /* default options */
           ovector,        /* vector of integers for substring information */
           30);            /* number of elements (NOT size in bytes) */

   Extra data for pcre_exec()

       If  the  extra argument is not NULL, it must point to a pcre_extra data
       block. The pcre_study() function returns such a block (when it  doesn't
       return  NULL), but you can also create one for yourself, and pass addi-
       tional information in it. The pcre_extra block contains  the  following
       fields (not necessarily in this order):

         unsigned long int flags;
         void *study_data;
         unsigned long int match_limit;
         unsigned long int match_limit_recursion;
         void *callout_data;
         const unsigned char *tables;

       The  flags  field  is a bitmap that specifies which of the other fields
       are set. The flag bits are:

         PCRE_EXTRA_STUDY_DATA
         PCRE_EXTRA_MATCH_LIMIT
         PCRE_EXTRA_MATCH_LIMIT_RECURSION
         PCRE_EXTRA_CALLOUT_DATA
         PCRE_EXTRA_TABLES

       Other flag bits should be set to zero. The study_data field is  set  in
       the  pcre_extra  block  that is returned by pcre_study(), together with
       the appropriate flag bit. You should not set this yourself, but you may
       add  to  the  block by setting the other fields and their corresponding
       flag bits.

       The match_limit field provides a means of preventing PCRE from using up
       a  vast amount of resources when running patterns that are not going to
       match, but which have a very large number  of  possibilities  in  their
       search  trees. The classic example is a pattern that uses nested unlim-
       ited repeats.

       Internally, PCRE uses a function called match() which it calls  repeat-
       edly  (sometimes  recursively). The limit set by match_limit is imposed
       on the number of times this function is called during  a  match,  which
       has  the  effect  of  limiting the amount of backtracking that can take
       place. For patterns that are not anchored, the count restarts from zero
       for each position in the subject string.

       The  default  value  for  the  limit can be set when PCRE is built; the
       default default is 10 million, which handles all but the  most  extreme
       cases.  You  can  override  the  default by suppling pcre_exec() with a
       pcre_extra    block    in    which    match_limit    is    set,     and
       PCRE_EXTRA_MATCH_LIMIT  is  set  in  the  flags  field. If the limit is
       exceeded, pcre_exec() returns PCRE_ERROR_MATCHLIMIT.

       The match_limit_recursion field is similar to match_limit, but  instead
       of limiting the total number of times that match() is called, it limits
       the depth of recursion. The recursion depth is a  smaller  number  than
       the  total number of calls, because not all calls to match() are recur-
       sive.  This limit is of use only if it is set smaller than match_limit.

       Limiting the recursion depth limits the amount of  stack  that  can  be
       used, or, when PCRE has been compiled to use memory on the heap instead
       of the stack, the amount of heap memory that can be used.

       The default value for match_limit_recursion can be  set  when  PCRE  is
       built;  the  default  default  is  the  same  value  as the default for
       match_limit. You can override the default by suppling pcre_exec()  with
       a   pcre_extra   block  in  which  match_limit_recursion  is  set,  and
       PCRE_EXTRA_MATCH_LIMIT_RECURSION is set in  the  flags  field.  If  the
       limit is exceeded, pcre_exec() returns PCRE_ERROR_RECURSIONLIMIT.

       The  callout_data  field is used in conjunction with the "callout" fea-
       ture, and is described in the pcrecallout documentation.

       The tables field  is  used  to  pass  a  character  tables  pointer  to
       pcre_exec();  this overrides the value that is stored with the compiled
       pattern. A non-NULL value is stored with the compiled pattern  only  if
       custom  tables  were  supplied to pcre_compile() via its tableptr argu-
       ment.  If NULL is passed to pcre_exec() using this mechanism, it forces
       PCRE's  internal  tables  to be used. This facility is helpful when re-
       using patterns that have been saved after compiling  with  an  external
       set  of  tables,  because  the  external tables might be at a different
       address when pcre_exec() is called. See the  pcreprecompile  documenta-
       tion for a discussion of saving compiled patterns for later use.

   Option bits for pcre_exec()

       The  unused  bits of the options argument for pcre_exec() must be zero.
       The only bits that may  be  set  are  PCRE_ANCHORED,  PCRE_NEWLINE_xxx,
       PCRE_NOTBOL,    PCRE_NOTEOL,    PCRE_NOTEMPTY,   PCRE_NOTEMPTY_ATSTART,
       PCRE_NO_START_OPTIMIZE,  PCRE_NO_UTF8_CHECK,   PCRE_PARTIAL_SOFT,   and
       PCRE_PARTIAL_HARD.

         PCRE_ANCHORED

       The  PCRE_ANCHORED  option  limits pcre_exec() to matching at the first
       matching position. If a pattern was  compiled  with  PCRE_ANCHORED,  or
       turned  out to be anchored by virtue of its contents, it cannot be made
       unachored at matching time.

         PCRE_BSR_ANYCRLF
         PCRE_BSR_UNICODE

       These options (which are mutually exclusive) control what the \R escape
       sequence  matches.  The choice is either to match only CR, LF, or CRLF,
       or to match any Unicode newline sequence. These  options  override  the
       choice that was made or defaulted when the pattern was compiled.

         PCRE_NEWLINE_CR
         PCRE_NEWLINE_LF
         PCRE_NEWLINE_CRLF
         PCRE_NEWLINE_ANYCRLF
         PCRE_NEWLINE_ANY

       These  options  override  the  newline  definition  that  was chosen or
       defaulted when the pattern was compiled. For details, see the  descrip-
       tion  of  pcre_compile()  above.  During  matching,  the newline choice
       affects the behaviour of the dot, circumflex,  and  dollar  metacharac-
       ters.  It may also alter the way the match position is advanced after a
       match failure for an unanchored pattern.

       When PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANYCRLF,  or  PCRE_NEWLINE_ANY  is
       set,  and a match attempt for an unanchored pattern fails when the cur-
       rent position is at a  CRLF  sequence,  and  the  pattern  contains  no
       explicit  matches  for  CR  or  LF  characters,  the  match position is
       advanced by two characters instead of one, in other words, to after the
       CRLF.

       The above rule is a compromise that makes the most common cases work as
       expected. For example, if the  pattern  is  .+A  (and  the  PCRE_DOTALL
       option is not set), it does not match the string "\r\nA" because, after
       failing at the start, it skips both the CR and the LF before  retrying.
       However,  the  pattern  [\r\n]A does match that string, because it con-
       tains an explicit CR or LF reference, and so advances only by one char-
       acter after the first failure.

       An explicit match for CR of LF is either a literal appearance of one of
       those characters, or one of the \r or  \n  escape  sequences.  Implicit
       matches  such  as [^X] do not count, nor does \s (which includes CR and
       LF in the characters that it matches).

       Notwithstanding the above, anomalous effects may still occur when  CRLF
       is a valid newline sequence and explicit \r or \n escapes appear in the
       pattern.

         PCRE_NOTBOL

       This option specifies that first character of the subject string is not
       the  beginning  of  a  line, so the circumflex metacharacter should not
       match before it. Setting this without PCRE_MULTILINE (at compile  time)
       causes  circumflex  never to match. This option affects only the behav-
       iour of the circumflex metacharacter. It does not affect \A.

         PCRE_NOTEOL

       This option specifies that the end of the subject string is not the end
       of  a line, so the dollar metacharacter should not match it nor (except
       in multiline mode) a newline immediately before it. Setting this  with-
       out PCRE_MULTILINE (at compile time) causes dollar never to match. This
       option affects only the behaviour of the dollar metacharacter. It  does
       not affect \Z or \z.

         PCRE_NOTEMPTY

       An empty string is not considered to be a valid match if this option is
       set. If there are alternatives in the pattern, they are tried.  If  all
       the  alternatives  match  the empty string, the entire match fails. For
       example, if the pattern

         a?b?

       is applied to a string not beginning with "a" or  "b",  it  matches  an
       empty  string at the start of the subject. With PCRE_NOTEMPTY set, this
       match is not valid, so PCRE searches further into the string for occur-
       rences of "a" or "b".

         PCRE_NOTEMPTY_ATSTART

       This  is  like PCRE_NOTEMPTY, except that an empty string match that is
       not at the start of  the  subject  is  permitted.  If  the  pattern  is
       anchored, such a match can occur only if the pattern contains \K.

       Perl     has    no    direct    equivalent    of    PCRE_NOTEMPTY    or
       PCRE_NOTEMPTY_ATSTART, but it does make a special  case  of  a  pattern
       match  of  the empty string within its split() function, and when using
       the /g modifier. It is  possible  to  emulate  Perl's  behaviour  after
       matching a null string by first trying the match again at the same off-
       set with PCRE_NOTEMPTY_ATSTART and  PCRE_ANCHORED,  and  then  if  that
       fails, by advancing the starting offset (see below) and trying an ordi-
       nary match again. There is some code that demonstrates how to  do  this
       in the pcredemo sample program.

         PCRE_NO_START_OPTIMIZE

       There  are a number of optimizations that pcre_exec() uses at the start
       of a match, in order to speed up the process. For  example,  if  it  is
       known  that  a  match must start with a specific character, it searches
       the subject for that character, and fails immediately if it cannot find
       it,  without actually running the main matching function. When callouts
       are in use, these optimizations can cause  them  to  be  skipped.  This
       option  disables  the  "start-up" optimizations, causing performance to
       suffer, but ensuring that the callouts do occur.

         PCRE_NO_UTF8_CHECK

       When PCRE_UTF8 is set at compile time, the validity of the subject as a
       UTF-8  string is automatically checked when pcre_exec() is subsequently
       called.  The value of startoffset is also checked  to  ensure  that  it
       points  to  the start of a UTF-8 character. There is a discussion about
       the validity of UTF-8 strings in the section on UTF-8  support  in  the
       main  pcre  page.  If  an  invalid  UTF-8  sequence  of bytes is found,
       pcre_exec() returns the error PCRE_ERROR_BADUTF8. If  startoffset  con-
       tains an invalid value, PCRE_ERROR_BADUTF8_OFFSET is returned.

       If  you  already  know that your subject is valid, and you want to skip
       these   checks   for   performance   reasons,   you   can    set    the
       PCRE_NO_UTF8_CHECK  option  when calling pcre_exec(). You might want to
       do this for the second and subsequent calls to pcre_exec() if  you  are
       making  repeated  calls  to  find  all  the matches in a single subject
       string. However, you should be  sure  that  the  value  of  startoffset
       points  to  the  start of a UTF-8 character. When PCRE_NO_UTF8_CHECK is
       set, the effect of passing an invalid UTF-8 string as a subject,  or  a
       value  of startoffset that does not point to the start of a UTF-8 char-
       acter, is undefined. Your program may crash.

         PCRE_PARTIAL_HARD
         PCRE_PARTIAL_SOFT

       These options turn on the partial matching feature. For backwards  com-
       patibility,  PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A partial
       match occurs if the end of the subject string is reached  successfully,
       but  there  are not enough subject characters to complete the match. If
       this happens when PCRE_PARTIAL_HARD  is  set,  pcre_exec()  immediately
       returns  PCRE_ERROR_PARTIAL.  Otherwise,  if  PCRE_PARTIAL_SOFT is set,
       matching continues by testing any other alternatives. Only if they  all
       fail  is  PCRE_ERROR_PARTIAL  returned (instead of PCRE_ERROR_NOMATCH).
       The portion of the string that was inspected when the partial match was
       found  is  set  as  the first matching string. There is a more detailed
       discussion in the pcrepartial documentation.

   The string to be matched by pcre_exec()

       The subject string is passed to pcre_exec() as a pointer in subject,  a
       length (in bytes) in length, and a starting byte offset in startoffset.
       In UTF-8 mode, the byte offset must point to the start of a UTF-8 char-
       acter.  Unlike  the pattern string, the subject may contain binary zero
       bytes. When the starting offset is zero, the search for a match  starts
       at  the  beginning  of  the subject, and this is by far the most common
       case.

       A non-zero starting offset is useful when searching for  another  match
       in  the same subject by calling pcre_exec() again after a previous suc-
       cess.  Setting startoffset differs from just passing over  a  shortened
       string  and  setting  PCRE_NOTBOL  in the case of a pattern that begins
       with any kind of lookbehind. For example, consider the pattern

         \Biss\B

       which finds occurrences of "iss" in the middle of  words.  (\B  matches
       only  if  the  current position in the subject is not a word boundary.)
       When applied to the string "Mississipi" the first call  to  pcre_exec()
       finds  the  first  occurrence. If pcre_exec() is called again with just
       the remainder of the subject,  namely  "issipi",  it  does  not  match,
       because \B is always false at the start of the subject, which is deemed
       to be a word boundary. However, if pcre_exec()  is  passed  the  entire
       string again, but with startoffset set to 4, it finds the second occur-
       rence of "iss" because it is able to look behind the starting point  to
       discover that it is preceded by a letter.

       If  a  non-zero starting offset is passed when the pattern is anchored,
       one attempt to match at the given offset is made. This can only succeed
       if  the  pattern  does  not require the match to be at the start of the
       subject.

   How pcre_exec() returns captured substrings

       In general, a pattern matches a certain portion of the subject, and  in
       addition,  further  substrings  from  the  subject may be picked out by
       parts of the pattern. Following the usage  in  Jeffrey  Friedl's  book,
       this  is  called "capturing" in what follows, and the phrase "capturing
       subpattern" is used for a fragment of a pattern that picks out  a  sub-
       string.  PCRE  supports several other kinds of parenthesized subpattern
       that do not cause substrings to be captured.

       Captured substrings are returned to the caller via a vector of integers
       whose  address is passed in ovector. The number of elements in the vec-
       tor is passed in ovecsize, which must be a non-negative  number.  Note:
       this argument is NOT the size of ovector in bytes.

       The  first  two-thirds of the vector is used to pass back captured sub-
       strings, each substring using a pair of integers. The  remaining  third
       of  the  vector is used as workspace by pcre_exec() while matching cap-
       turing subpatterns, and is not available for passing back  information.
       The  number passed in ovecsize should always be a multiple of three. If
       it is not, it is rounded down.

       When a match is successful, information about  captured  substrings  is
       returned  in  pairs  of integers, starting at the beginning of ovector,
       and continuing up to two-thirds of its length at the  most.  The  first
       element  of  each pair is set to the byte offset of the first character
       in a substring, and the second is set to the byte offset of  the  first
       character  after  the end of a substring. Note: these values are always
       byte offsets, even in UTF-8 mode. They are not character counts.

       The first pair of integers, ovector[0]  and  ovector[1],  identify  the
       portion  of  the subject string matched by the entire pattern. The next
       pair is used for the first capturing subpattern, and so on.  The  value
       returned by pcre_exec() is one more than the highest numbered pair that
       has been set.  For example, if two substrings have been  captured,  the
       returned  value is 3. If there are no capturing subpatterns, the return
       value from a successful match is 1, indicating that just the first pair
       of offsets has been set.

       If a capturing subpattern is matched repeatedly, it is the last portion
       of the string that it matched that is returned.

       If the vector is too small to hold all the captured substring  offsets,
       it is used as far as possible (up to two-thirds of its length), and the
       function returns a value of zero. If the substring offsets are  not  of
       interest,  pcre_exec()  may  be  called with ovector passed as NULL and
       ovecsize as zero. However, if the pattern contains back references  and
       the  ovector is not big enough to remember the related substrings, PCRE
       has to get additional memory for use during matching. Thus it  is  usu-
       ally advisable to supply an ovector.

       The pcre_fullinfo() function can be used to find out how many capturing
       subpatterns there are in a compiled  pattern.  The  smallest  size  for
       ovector  that  will allow for n captured substrings, in addition to the
       offsets of the substring matched by the whole pattern, is (n+1)*3.

       It is possible for capturing subpattern number n+1 to match  some  part
       of the subject when subpattern n has not been used at all. For example,
       if the string "abc" is matched  against  the  pattern  (a|(z))(bc)  the
       return from the function is 4, and subpatterns 1 and 3 are matched, but
       2 is not. When this happens, both values in  the  offset  pairs  corre-
       sponding to unused subpatterns are set to -1.

       Offset  values  that correspond to unused subpatterns at the end of the
       expression are also set to -1. For example,  if  the  string  "abc"  is
       matched  against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not
       matched. The return from the function is 2, because  the  highest  used
       capturing subpattern number is 1. However, you can refer to the offsets
       for the second and third capturing subpatterns if  you  wish  (assuming
       the vector is large enough, of course).

       Some  convenience  functions  are  provided for extracting the captured
       substrings as separate strings. These are described below.

   Error return values from pcre_exec()

       If pcre_exec() fails, it returns a negative number. The  following  are
       defined in the header file:

         PCRE_ERROR_NOMATCH        (-1)

       The subject string did not match the pattern.

         PCRE_ERROR_NULL           (-2)

       Either  code  or  subject  was  passed as NULL, or ovector was NULL and
       ovecsize was not zero.

         PCRE_ERROR_BADOPTION      (-3)

       An unrecognized bit was set in the options argument.

         PCRE_ERROR_BADMAGIC       (-4)

       PCRE stores a 4-byte "magic number" at the start of the compiled  code,
       to catch the case when it is passed a junk pointer and to detect when a
       pattern that was compiled in an environment of one endianness is run in
       an  environment  with the other endianness. This is the error that PCRE
       gives when the magic number is not present.

         PCRE_ERROR_UNKNOWN_OPCODE (-5)

       While running the pattern match, an unknown item was encountered in the
       compiled  pattern.  This  error  could be caused by a bug in PCRE or by
       overwriting of the compiled pattern.

         PCRE_ERROR_NOMEMORY       (-6)

       If a pattern contains back references, but the ovector that  is  passed
       to pcre_exec() is not big enough to remember the referenced substrings,
       PCRE gets a block of memory at the start of matching to  use  for  this
       purpose.  If the call via pcre_malloc() fails, this error is given. The
       memory is automatically freed at the end of matching.

         PCRE_ERROR_NOSUBSTRING    (-7)

       This error is used by the pcre_copy_substring(),  pcre_get_substring(),
       and  pcre_get_substring_list()  functions  (see  below).  It  is  never
       returned by pcre_exec().

         PCRE_ERROR_MATCHLIMIT     (-8)

       The backtracking limit, as specified by  the  match_limit  field  in  a
       pcre_extra  structure  (or  defaulted) was reached. See the description
       above.

         PCRE_ERROR_CALLOUT        (-9)

       This error is never generated by pcre_exec() itself. It is provided for
       use  by  callout functions that want to yield a distinctive error code.
       See the pcrecallout documentation for details.

         PCRE_ERROR_BADUTF8        (-10)

       A string that contains an invalid UTF-8 byte sequence was passed  as  a
       subject.

         PCRE_ERROR_BADUTF8_OFFSET (-11)

       The UTF-8 byte sequence that was passed as a subject was valid, but the
       value of startoffset did not point to the beginning of a UTF-8  charac-
       ter.

         PCRE_ERROR_PARTIAL        (-12)

       The  subject  string did not match, but it did match partially. See the
       pcrepartial documentation for details of partial matching.

         PCRE_ERROR_BADPARTIAL     (-13)

       This code is no longer in  use.  It  was  formerly  returned  when  the
       PCRE_PARTIAL  option  was used with a compiled pattern containing items
       that were  not  supported  for  partial  matching.  From  release  8.00
       onwards, there are no restrictions on partial matching.

         PCRE_ERROR_INTERNAL       (-14)

       An  unexpected  internal error has occurred. This error could be caused
       by a bug in PCRE or by overwriting of the compiled pattern.

         PCRE_ERROR_BADCOUNT       (-15)

       This error is given if the value of the ovecsize argument is negative.

         PCRE_ERROR_RECURSIONLIMIT (-21)

       The internal recursion limit, as specified by the match_limit_recursion
       field  in  a  pcre_extra  structure (or defaulted) was reached. See the
       description above.

         PCRE_ERROR_BADNEWLINE     (-23)

       An invalid combination of PCRE_NEWLINE_xxx options was given.

       Error numbers -16 to -20 and -22 are not used by pcre_exec().


EXTRACTING CAPTURED SUBSTRINGS BY NUMBER

       int pcre_copy_substring(const char *subject, int *ovector,
            int stringcount, int stringnumber, char *buffer,
            int buffersize);

       int pcre_get_substring(const char *subject, int *ovector,
            int stringcount, int stringnumber,
            const char **stringptr);

       int pcre_get_substring_list(const char *subject,
            int *ovector, int stringcount, const char ***listptr);

       Captured substrings can be  accessed  directly  by  using  the  offsets
       returned  by  pcre_exec()  in  ovector.  For convenience, the functions
       pcre_copy_substring(),    pcre_get_substring(),    and    pcre_get_sub-
       string_list()  are  provided for extracting captured substrings as new,
       separate, zero-terminated strings. These functions identify  substrings
       by  number.  The  next section describes functions for extracting named
       substrings.

       A substring that contains a binary zero is correctly extracted and  has
       a  further zero added on the end, but the result is not, of course, a C
       string.  However, you can process such a string  by  referring  to  the
       length  that  is  returned  by  pcre_copy_substring() and pcre_get_sub-
       string().  Unfortunately, the interface to pcre_get_substring_list() is
       not  adequate for handling strings containing binary zeros, because the
       end of the final string is not independently indicated.

       The first three arguments are the same for all  three  of  these  func-
       tions:  subject  is  the subject string that has just been successfully
       matched, ovector is a pointer to the vector of integer offsets that was
       passed to pcre_exec(), and stringcount is the number of substrings that
       were captured by the match, including the substring  that  matched  the
       entire regular expression. This is the value returned by pcre_exec() if
       it is greater than zero. If pcre_exec() returned zero, indicating  that
       it  ran out of space in ovector, the value passed as stringcount should
       be the number of elements in the vector divided by three.

       The functions pcre_copy_substring() and pcre_get_substring() extract  a
       single  substring,  whose  number  is given as stringnumber. A value of
       zero extracts the substring that matched the  entire  pattern,  whereas
       higher  values  extract  the  captured  substrings.  For pcre_copy_sub-
       string(), the string is placed in buffer,  whose  length  is  given  by
       buffersize,  while  for  pcre_get_substring()  a new block of memory is
       obtained via pcre_malloc, and its address is  returned  via  stringptr.
       The  yield  of  the function is the length of the string, not including
       the terminating zero, or one of these error codes:

         PCRE_ERROR_NOMEMORY       (-6)

       The buffer was too small for pcre_copy_substring(), or the  attempt  to
       get memory failed for pcre_get_substring().

         PCRE_ERROR_NOSUBSTRING    (-7)

       There is no substring whose number is stringnumber.

       The  pcre_get_substring_list()  function  extracts  all  available sub-
       strings and builds a list of pointers to them. All this is  done  in  a
       single block of memory that is obtained via pcre_malloc. The address of
       the memory block is returned via listptr, which is also  the  start  of
       the  list  of  string pointers. The end of the list is marked by a NULL
       pointer. The yield of the function is zero if all  went  well,  or  the
       error code

         PCRE_ERROR_NOMEMORY       (-6)

       if the attempt to get the memory block failed.

       When  any of these functions encounter a substring that is unset, which
       can happen when capturing subpattern number n+1 matches  some  part  of
       the  subject, but subpattern n has not been used at all, they return an
       empty string. This can be distinguished from a genuine zero-length sub-
       string  by inspecting the appropriate offset in ovector, which is nega-
       tive for unset substrings.

       The two convenience functions pcre_free_substring() and  pcre_free_sub-
       string_list()  can  be  used  to free the memory returned by a previous
       call  of  pcre_get_substring()  or  pcre_get_substring_list(),  respec-
       tively.  They  do  nothing  more  than  call the function pointed to by
       pcre_free, which of course could be called directly from a  C  program.
       However,  PCRE is used in some situations where it is linked via a spe-
       cial  interface  to  another  programming  language  that  cannot   use
       pcre_free  directly;  it is for these cases that the functions are pro-
       vided.


EXTRACTING CAPTURED SUBSTRINGS BY NAME

       int pcre_get_stringnumber(const pcre *code,
            const char *name);

       int pcre_copy_named_substring(const pcre *code,
            const char *subject, int *ovector,
            int stringcount, const char *stringname,
            char *buffer, int buffersize);

       int pcre_get_named_substring(const pcre *code,
            const char *subject, int *ovector,
            int stringcount, const char *stringname,
            const char **stringptr);

       To extract a substring by name, you first have to find associated  num-
       ber.  For example, for this pattern

         (a+)b(?<xxx>\d+)...

       the number of the subpattern called "xxx" is 2. If the name is known to
       be unique (PCRE_DUPNAMES was not set), you can find the number from the
       name by calling pcre_get_stringnumber(). The first argument is the com-
       piled pattern, and the second is the name. The yield of the function is
       the  subpattern  number,  or PCRE_ERROR_NOSUBSTRING (-7) if there is no
       subpattern of that name.

       Given the number, you can extract the substring directly, or use one of
       the functions described in the previous section. For convenience, there
       are also two functions that do the whole job.

       Most   of   the   arguments    of    pcre_copy_named_substring()    and
       pcre_get_named_substring()  are  the  same  as  those for the similarly
       named functions that extract by number. As these are described  in  the
       previous  section,  they  are not re-described here. There are just two
       differences:

       First, instead of a substring number, a substring name is  given.  Sec-
       ond, there is an extra argument, given at the start, which is a pointer
       to the compiled pattern. This is needed in order to gain access to  the
       name-to-number translation table.

       These  functions call pcre_get_stringnumber(), and if it succeeds, they
       then call pcre_copy_substring() or pcre_get_substring(),  as  appropri-
       ate.  NOTE:  If PCRE_DUPNAMES is set and there are duplicate names, the
       behaviour may not be what you want (see the next section).

       Warning: If the pattern uses the (?| feature to set up multiple subpat-
       terns  with  the  same number, as described in the section on duplicate
       subpattern numbers in the pcrepattern page, you  cannot  use  names  to
       distinguish  the  different subpatterns, because names are not included
       in the compiled code. The matching process uses only numbers. For  this
       reason,  the  use of different names for subpatterns of the same number
       causes an error at compile time.


DUPLICATE SUBPATTERN NAMES

       int pcre_get_stringtable_entries(const pcre *code,
            const char *name, char **first, char **last);

       When a pattern is compiled with the  PCRE_DUPNAMES  option,  names  for
       subpatterns  are not required to be unique. (Duplicate names are always
       allowed for subpatterns with the same number, created by using the  (?|
       feature.  Indeed,  if  such subpatterns are named, they are required to
       use the same names.)

       Normally, patterns with duplicate names are such that in any one match,
       only  one of the named subpatterns participates. An example is shown in
       the pcrepattern documentation.

       When   duplicates   are   present,   pcre_copy_named_substring()    and
       pcre_get_named_substring()  return the first substring corresponding to
       the given name that is set. If  none  are  set,  PCRE_ERROR_NOSUBSTRING
       (-7)  is  returned;  no  data  is returned. The pcre_get_stringnumber()
       function returns one of the numbers that are associated with the  name,
       but it is not defined which it is.

       If  you want to get full details of all captured substrings for a given
       name, you must use  the  pcre_get_stringtable_entries()  function.  The
       first argument is the compiled pattern, and the second is the name. The
       third and fourth are pointers to variables which  are  updated  by  the
       function. After it has run, they point to the first and last entries in
       the name-to-number table  for  the  given  name.  The  function  itself
       returns  the  length  of  each entry, or PCRE_ERROR_NOSUBSTRING (-7) if
       there are none. The format of the table is described above in the  sec-
       tion  entitled  Information  about  a  pattern.  Given all the relevant
       entries for the name, you can extract each of their numbers, and  hence
       the captured data, if any.


FINDING ALL POSSIBLE MATCHES

       The  traditional  matching  function  uses a similar algorithm to Perl,
       which stops when it finds the first match, starting at a given point in
       the  subject.  If you want to find all possible matches, or the longest
       possible match, consider using the alternative matching  function  (see
       below)  instead.  If you cannot use the alternative function, but still
       need to find all possible matches, you can kludge it up by  making  use
       of the callout facility, which is described in the pcrecallout documen-
       tation.

       What you have to do is to insert a callout right at the end of the pat-
       tern.   When your callout function is called, extract and save the cur-
       rent matched substring. Then return  1,  which  forces  pcre_exec()  to
       backtrack  and  try other alternatives. Ultimately, when it runs out of
       matches, pcre_exec() will yield PCRE_ERROR_NOMATCH.


MATCHING A PATTERN: THE ALTERNATIVE FUNCTION

       int pcre_dfa_exec(const pcre *code, const pcre_extra *extra,
            const char *subject, int length, int startoffset,
            int options, int *ovector, int ovecsize,
            int *workspace, int wscount);

       The function pcre_dfa_exec()  is  called  to  match  a  subject  string
       against  a  compiled pattern, using a matching algorithm that scans the
       subject string just once, and does not backtrack.  This  has  different
       characteristics  to  the  normal  algorithm, and is not compatible with
       Perl. Some of the features of PCRE patterns are not  supported.  Never-
       theless,  there are times when this kind of matching can be useful. For
       a discussion of the two matching algorithms, and  a  list  of  features
       that  pcre_dfa_exec() does not support, see the pcrematching documenta-
       tion.

       The arguments for the pcre_dfa_exec() function  are  the  same  as  for
       pcre_exec(), plus two extras. The ovector argument is used in a differ-
       ent way, and this is described below. The other  common  arguments  are
       used  in  the  same way as for pcre_exec(), so their description is not
       repeated here.

       The two additional arguments provide workspace for  the  function.  The
       workspace  vector  should  contain at least 20 elements. It is used for
       keeping  track  of  multiple  paths  through  the  pattern  tree.  More
       workspace  will  be  needed for patterns and subjects where there are a
       lot of potential matches.

       Here is an example of a simple call to pcre_dfa_exec():

         int rc;
         int ovector[10];
         int wspace[20];
         rc = pcre_dfa_exec(
           re,             /* result of pcre_compile() */
           NULL,           /* we didn't study the pattern */
           "some string",  /* the subject string */
           11,             /* the length of the subject string */
           0,              /* start at offset 0 in the subject */
           0,              /* default options */
           ovector,        /* vector of integers for substring information */
           10,             /* number of elements (NOT size in bytes) */
           wspace,         /* working space vector */
           20);            /* number of elements (NOT size in bytes) */

   Option bits for pcre_dfa_exec()

       The unused bits of the options argument  for  pcre_dfa_exec()  must  be
       zero.  The  only  bits  that  may  be  set are PCRE_ANCHORED, PCRE_NEW-
       LINE_xxx,        PCRE_NOTBOL,        PCRE_NOTEOL,        PCRE_NOTEMPTY,
       PCRE_NOTEMPTY_ATSTART, PCRE_NO_UTF8_CHECK, PCRE_PARTIAL_HARD, PCRE_PAR-
       TIAL_SOFT, PCRE_DFA_SHORTEST, and PCRE_DFA_RESTART. All  but  the  last
       four  of  these  are  exactly  the  same  as  for pcre_exec(), so their
       description is not repeated here.

         PCRE_PARTIAL_HARD
         PCRE_PARTIAL_SOFT

       These have the same general effect as they do for pcre_exec(), but  the
       details  are  slightly  different.  When  PCRE_PARTIAL_HARD  is set for
       pcre_dfa_exec(), it returns PCRE_ERROR_PARTIAL if the end of  the  sub-
       ject  is  reached  and there is still at least one matching possibility
       that requires additional characters. This happens even if some complete
       matches have also been found. When PCRE_PARTIAL_SOFT is set, the return
       code PCRE_ERROR_NOMATCH is converted into PCRE_ERROR_PARTIAL if the end
       of  the  subject  is  reached, there have been no complete matches, but
       there is still at least one matching possibility. The  portion  of  the
       string  that  was inspected when the longest partial match was found is
       set as the first matching string in both cases.

         PCRE_DFA_SHORTEST

       Setting the PCRE_DFA_SHORTEST option causes the matching  algorithm  to
       stop as soon as it has found one match. Because of the way the alterna-
       tive algorithm works, this is necessarily the shortest  possible  match
       at the first possible matching point in the subject string.

         PCRE_DFA_RESTART

       When pcre_dfa_exec() returns a partial match, it is possible to call it
       again, with additional subject characters, and have  it  continue  with
       the  same match. The PCRE_DFA_RESTART option requests this action; when
       it is set, the workspace and wscount options must  reference  the  same
       vector  as  before  because data about the match so far is left in them
       after a partial match. There is more discussion of this facility in the
       pcrepartial documentation.

   Successful returns from pcre_dfa_exec()

       When  pcre_dfa_exec()  succeeds, it may have matched more than one sub-
       string in the subject. Note, however, that all the matches from one run
       of  the  function  start  at the same point in the subject. The shorter
       matches are all initial substrings of the longer matches. For  example,
       if the pattern

         <.*>

       is matched against the string

         This is <something> <something else> <something further> no more

       the three matched strings are

         <something>
         <something> <something else>
         <something> <something else> <something further>

       On  success,  the  yield of the function is a number greater than zero,
       which is the number of matched substrings.  The  substrings  themselves
       are  returned  in  ovector. Each string uses two elements; the first is
       the offset to the start, and the second is the offset to  the  end.  In
       fact,  all  the  strings  have the same start offset. (Space could have
       been saved by giving this only once, but it was decided to retain  some
       compatibility  with  the  way pcre_exec() returns data, even though the
       meaning of the strings is different.)

       The strings are returned in reverse order of length; that is, the long-
       est  matching  string is given first. If there were too many matches to
       fit into ovector, the yield of the function is zero, and the vector  is
       filled with the longest matches.

   Error returns from pcre_dfa_exec()

       The  pcre_dfa_exec()  function returns a negative number when it fails.
       Many of the errors are the same  as  for  pcre_exec(),  and  these  are
       described  above.   There are in addition the following errors that are
       specific to pcre_dfa_exec():

         PCRE_ERROR_DFA_UITEM      (-16)

       This return is given if pcre_dfa_exec() encounters an item in the  pat-
       tern  that  it  does not support, for instance, the use of \C or a back
       reference.

         PCRE_ERROR_DFA_UCOND      (-17)

       This return is given if pcre_dfa_exec()  encounters  a  condition  item
       that  uses  a back reference for the condition, or a test for recursion
       in a specific group. These are not supported.

         PCRE_ERROR_DFA_UMLIMIT    (-18)

       This return is given if pcre_dfa_exec() is called with an  extra  block
       that contains a setting of the match_limit field. This is not supported
       (it is meaningless).

         PCRE_ERROR_DFA_WSSIZE     (-19)

       This return is given if  pcre_dfa_exec()  runs  out  of  space  in  the
       workspace vector.

         PCRE_ERROR_DFA_RECURSE    (-20)

       When  a  recursive subpattern is processed, the matching function calls
       itself recursively, using private vectors for  ovector  and  workspace.
       This  error  is  given  if  the output vector is not large enough. This
       should be extremely rare, as a vector of size 1000 is used.


SEE ALSO

       pcrebuild(3), pcrecallout(3), pcrecpp(3)(3), pcrematching(3),  pcrepar-
       tial(3), pcreposix(3), pcreprecompile(3), pcresample(3), pcrestack(3).


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 03 October 2009
       Copyright (c) 1997-2009 University of Cambridge.
------------------------------------------------------------------------------


PCRECALLOUT(3)                                                  PCRECALLOUT(3)


NAME
       PCRE - Perl-compatible regular expressions


PCRE CALLOUTS

       int (*pcre_callout)(pcre_callout_block *);

       PCRE provides a feature called "callout", which is a means of temporar-
       ily passing control to the caller of PCRE  in  the  middle  of  pattern
       matching.  The  caller of PCRE provides an external function by putting
       its entry point in the global variable pcre_callout. By  default,  this
       variable contains NULL, which disables all calling out.

       Within  a  regular  expression,  (?C) indicates the points at which the
       external function is to be called.  Different  callout  points  can  be
       identified  by  putting  a number less than 256 after the letter C. The
       default value is zero.  For  example,  this  pattern  has  two  callout
       points:

         (?C1)abc(?C2)def

       If  the  PCRE_AUTO_CALLOUT  option  bit  is  set when pcre_compile() or
       pcre_compile2() is called, PCRE  automatically  inserts  callouts,  all
       with  number  255,  before  each  item  in the pattern. For example, if
       PCRE_AUTO_CALLOUT is used with the pattern

         A(\d{2}|--)

       it is processed as if it were

       (?C255)A(?C255)((?C255)\d{2}(?C255)|(?C255)-(?C255)-(?C255))(?C255)

       Notice that there is a callout before and after  each  parenthesis  and
       alternation  bar.  Automatic  callouts  can  be  used  for tracking the
       progress of pattern matching. The pcretest command has an  option  that
       sets  automatic callouts; when it is used, the output indicates how the
       pattern is matched. This is useful information when you are  trying  to
       optimize the performance of a particular pattern.


MISSING CALLOUTS

       You  should  be  aware  that,  because of optimizations in the way PCRE
       matches patterns by default, callouts  sometimes  do  not  happen.  For
       example, if the pattern is

         ab(?C4)cd

       PCRE knows that any matching string must contain the letter "d". If the
       subject string is "abyz", the lack of "d" means that  matching  doesn't
       ever  start,  and  the  callout is never reached. However, with "abyd",
       though the result is still no match, the callout is obeyed.

       If the pattern is studied, PCRE knows the minimum length of a  matching
       string,  and will immediately give a "no match" return without actually
       running a match if the subject is not long enough, or,  for  unanchored
       patterns, if it has been scanned far enough.

       You  can disable these optimizations by passing the PCRE_NO_START_OPTI-
       MIZE option to pcre_exec() or  pcre_dfa_exec().  This  slows  down  the
       matching  process,  but  does  ensure that callouts such as the example
       above are obeyed.


THE CALLOUT INTERFACE

       During matching, when PCRE reaches a callout point, the external  func-
       tion  defined by pcre_callout is called (if it is set). This applies to
       both the pcre_exec() and the pcre_dfa_exec()  matching  functions.  The
       only  argument  to  the callout function is a pointer to a pcre_callout
       block. This structure contains the following fields:

         int          version;
         int          callout_number;
         int         *offset_vector;
         const char  *subject;
         int          subject_length;
         int          start_match;
         int          current_position;
         int          capture_top;
         int          capture_last;
         void        *callout_data;
         int          pattern_position;
         int          next_item_length;

       The version field is an integer containing the version  number  of  the
       block  format. The initial version was 0; the current version is 1. The
       version number will change again in future  if  additional  fields  are
       added, but the intention is never to remove any of the existing fields.

       The  callout_number  field  contains the number of the callout, as com-
       piled into the pattern (that is, the number after ?C for  manual  call-
       outs, and 255 for automatically generated callouts).

       The  offset_vector field is a pointer to the vector of offsets that was
       passed  by  the  caller  to  pcre_exec()   or   pcre_dfa_exec().   When
       pcre_exec()  is used, the contents can be inspected in order to extract
       substrings that have been matched so  far,  in  the  same  way  as  for
       extracting  substrings after a match has completed. For pcre_dfa_exec()
       this field is not useful.

       The subject and subject_length fields contain copies of the values that
       were passed to pcre_exec().

       The  start_match  field normally contains the offset within the subject
       at which the current match attempt  started.  However,  if  the  escape
       sequence  \K has been encountered, this value is changed to reflect the
       modified starting point. If the pattern is not  anchored,  the  callout
       function may be called several times from the same point in the pattern
       for different starting points in the subject.

       The current_position field contains the offset within  the  subject  of
       the current match pointer.

       When  the  pcre_exec() function is used, the capture_top field contains
       one more than the number of the highest numbered captured substring  so
       far.  If  no substrings have been captured, the value of capture_top is
       one. This is always the case when pcre_dfa_exec() is used,  because  it
       does not support captured substrings.

       The  capture_last  field  contains the number of the most recently cap-
       tured substring. If no substrings have been captured, its value is  -1.
       This is always the case when pcre_dfa_exec() is used.

       The  callout_data  field contains a value that is passed to pcre_exec()
       or pcre_dfa_exec() specifically so that it can be passed back in  call-
       outs.  It  is  passed  in the pcre_callout field of the pcre_extra data
       structure. If no such data was passed, the value of callout_data  in  a
       pcre_callout  block  is  NULL. There is a description of the pcre_extra
       structure in the pcreapi documentation.

       The pattern_position field is present from version 1 of the  pcre_call-
       out structure. It contains the offset to the next item to be matched in
       the pattern string.

       The next_item_length field is present from version 1 of the  pcre_call-
       out structure. It contains the length of the next item to be matched in
       the pattern string. When the callout immediately precedes  an  alterna-
       tion  bar, a closing parenthesis, or the end of the pattern, the length
       is zero. When the callout precedes an opening parenthesis,  the  length
       is that of the entire subpattern.

       The  pattern_position  and next_item_length fields are intended to help
       in distinguishing between different automatic callouts, which all  have
       the same callout number. However, they are set for all callouts.


RETURN VALUES

       The  external callout function returns an integer to PCRE. If the value
       is zero, matching proceeds as normal. If  the  value  is  greater  than
       zero,  matching  fails  at  the current point, but the testing of other
       matching possibilities goes ahead, just as if a lookahead assertion had
       failed.  If  the  value  is less than zero, the match is abandoned, and
       pcre_exec() or pcre_dfa_exec() returns the negative value.

       Negative  values  should  normally  be   chosen   from   the   set   of
       PCRE_ERROR_xxx values. In particular, PCRE_ERROR_NOMATCH forces a stan-
       dard "no  match"  failure.   The  error  number  PCRE_ERROR_CALLOUT  is
       reserved  for  use  by callout functions; it will never be used by PCRE
       itself.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 29 September 2009
       Copyright (c) 1997-2009 University of Cambridge.
------------------------------------------------------------------------------


PCRECOMPAT(3)                                                    PCRECOMPAT(3)


NAME
       PCRE - Perl-compatible regular expressions


DIFFERENCES BETWEEN PCRE AND PERL

       This  document describes the differences in the ways that PCRE and Perl
       handle regular expressions. The differences  described  here  are  with
       respect to Perl 5.10.

       1.  PCRE has only a subset of Perl's UTF-8 and Unicode support. Details
       of what it does have are given in the section on UTF-8 support  in  the
       main pcre page.

       2. PCRE does not allow repeat quantifiers on lookahead assertions. Perl
       permits them, but they do not mean what you might think.  For  example,
       (?!a){3} does not assert that the next three characters are not "a". It
       just asserts that the next character is not "a" three times.

       3. Capturing subpatterns that occur inside  negative  lookahead  asser-
       tions  are  counted,  but their entries in the offsets vector are never
       set. Perl sets its numerical variables from any such patterns that  are
       matched before the assertion fails to match something (thereby succeed-
       ing), but only if the negative lookahead assertion  contains  just  one
       branch.

       4.  Though  binary zero characters are supported in the subject string,
       they are not allowed in a pattern string because it is passed as a nor-
       mal C string, terminated by zero. The escape sequence \0 can be used in
       the pattern to represent a binary zero.

       5. The following Perl escape sequences are not supported: \l,  \u,  \L,
       \U, and \N. In fact these are implemented by Perl's general string-han-
       dling and are not part of its pattern matching engine. If any of  these
       are encountered by PCRE, an error is generated.

       6.  The Perl escape sequences \p, \P, and \X are supported only if PCRE
       is built with Unicode character property support. The  properties  that
       can  be tested with \p and \P are limited to the general category prop-
       erties such as Lu and Nd, script names such as Greek or  Han,  and  the
       derived  properties  Any  and  L&. PCRE does support the Cs (surrogate)
       property, which Perl does not; the  Perl  documentation  says  "Because
       Perl hides the need for the user to understand the internal representa-
       tion of Unicode characters, there is no need to implement the  somewhat
       messy concept of surrogates."

       7. PCRE does support the \Q...\E escape for quoting substrings. Charac-
       ters in between are treated as literals.  This  is  slightly  different
       from  Perl  in  that  $  and  @ are also handled as literals inside the
       quotes. In Perl, they cause variable interpolation (but of course  PCRE
       does not have variables). Note the following examples:

           Pattern            PCRE matches      Perl matches

           \Qabc$xyz\E        abc$xyz           abc followed by the
                                                  contents of $xyz
           \Qabc\$xyz\E       abc\$xyz          abc\$xyz
           \Qabc\E\$\Qxyz\E   abc$xyz           abc$xyz

       The  \Q...\E  sequence  is recognized both inside and outside character
       classes.

       8. Fairly obviously, PCRE does not support the (?{code}) and (??{code})
       constructions.  However,  there is support for recursive patterns. This
       is not available in Perl 5.8, but it is in Perl 5.10.  Also,  the  PCRE
       "callout"  feature allows an external function to be called during pat-
       tern matching. See the pcrecallout documentation for details.

       9. Subpatterns that are called  recursively  or  as  "subroutines"  are
       always  treated  as  atomic  groups  in  PCRE. This is like Python, but
       unlike Perl. There is a discussion of an example that explains this  in
       more  detail  in  the section on recursion differences from Perl in the
       pcrepattern page.

       10. There are some differences that are concerned with the settings  of
       captured  strings  when  part  of  a  pattern is repeated. For example,
       matching "aba" against the  pattern  /^(a(b)?)+$/  in  Perl  leaves  $2
       unset, but in PCRE it is set to "b".

       11.  PCRE  does  support  Perl  5.10's  backtracking  verbs  (*ACCEPT),
       (*FAIL), (*F), (*COMMIT), (*PRUNE), (*SKIP), and (*THEN), but  only  in
       the forms without an argument. PCRE does not support (*MARK).

       12.  PCRE's handling of duplicate subpattern numbers and duplicate sub-
       pattern names is not as general as Perl's. This is a consequence of the
       fact the PCRE works internally just with numbers, using an external ta-
       ble to translate between numbers and names. In  particular,  a  pattern
       such  as  (?|(?<a>A)|(?<b)B),  where the two capturing parentheses have
       the same number but different names, is not supported,  and  causes  an
       error  at compile time. If it were allowed, it would not be possible to
       distinguish which parentheses matched, because both names map  to  cap-
       turing subpattern number 1. To avoid this confusing situation, an error
       is given at compile time.

       13. PCRE provides some extensions to the Perl regular expression facil-
       ities.   Perl  5.10  includes new features that are not in earlier ver-
       sions of Perl, some of which (such as named parentheses) have  been  in
       PCRE for some time. This list is with respect to Perl 5.10:

       (a)  Although  lookbehind  assertions  in  PCRE must match fixed length
       strings, each alternative branch of a lookbehind assertion can match  a
       different  length  of  string.  Perl requires them all to have the same
       length.

       (b) If PCRE_DOLLAR_ENDONLY is set and PCRE_MULTILINE is not set, the  $
       meta-character matches only at the very end of the string.

       (c) If PCRE_EXTRA is set, a backslash followed by a letter with no spe-
       cial meaning is faulted. Otherwise, like Perl, the backslash is quietly
       ignored.  (Perl can be made to issue a warning.)

       (d)  If  PCRE_UNGREEDY is set, the greediness of the repetition quanti-
       fiers is inverted, that is, by default they are not greedy, but if fol-
       lowed by a question mark they are.

       (e) PCRE_ANCHORED can be used at matching time to force a pattern to be
       tried only at the first matching position in the subject string.

       (f) The PCRE_NOTBOL, PCRE_NOTEOL, PCRE_NOTEMPTY, PCRE_NOTEMPTY_ATSTART,
       and  PCRE_NO_AUTO_CAPTURE  options for pcre_exec() have no Perl equiva-
       lents.

       (g) The \R escape sequence can be restricted to match only CR,  LF,  or
       CRLF by the PCRE_BSR_ANYCRLF option.

       (h) The callout facility is PCRE-specific.

       (i) The partial matching facility is PCRE-specific.

       (j) Patterns compiled by PCRE can be saved and re-used at a later time,
       even on different hosts that have the other endianness.

       (k) The alternative matching function (pcre_dfa_exec())  matches  in  a
       different way and is not Perl-compatible.

       (l)  PCRE  recognizes some special sequences such as (*CR) at the start
       of a pattern that set overall options that cannot be changed within the
       pattern.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 04 October 2009
       Copyright (c) 1997-2009 University of Cambridge.
------------------------------------------------------------------------------


PCREPATTERN(3)                                                  PCREPATTERN(3)


NAME
       PCRE - Perl-compatible regular expressions


PCRE REGULAR EXPRESSION DETAILS

       The  syntax and semantics of the regular expressions that are supported
       by PCRE are described in detail below. There is a quick-reference  syn-
       tax summary in the pcresyntax page. PCRE tries to match Perl syntax and
       semantics as closely as it can. PCRE  also  supports  some  alternative
       regular  expression  syntax (which does not conflict with the Perl syn-
       tax) in order to provide some compatibility with regular expressions in
       Python, .NET, and Oniguruma.

       Perl's  regular expressions are described in its own documentation, and
       regular expressions in general are covered in a number of  books,  some
       of  which  have  copious  examples. Jeffrey Friedl's "Mastering Regular
       Expressions", published by  O'Reilly,  covers  regular  expressions  in
       great  detail.  This  description  of  PCRE's  regular  expressions  is
       intended as reference material.

       The original operation of PCRE was on strings of  one-byte  characters.
       However,  there is now also support for UTF-8 character strings. To use
       this, PCRE must be built to include UTF-8 support, and  you  must  call
       pcre_compile()  or  pcre_compile2() with the PCRE_UTF8 option. There is
       also a special sequence that can be given at the start of a pattern:

         (*UTF8)

       Starting a pattern with this sequence  is  equivalent  to  setting  the
       PCRE_UTF8  option.  This  feature  is  not Perl-compatible. How setting
       UTF-8 mode affects pattern matching  is  mentioned  in  several  places
       below.  There  is  also  a  summary of UTF-8 features in the section on
       UTF-8 support in the main pcre page.

       The remainder of this document discusses the  patterns  that  are  sup-
       ported  by  PCRE when its main matching function, pcre_exec(), is used.
       From  release  6.0,   PCRE   offers   a   second   matching   function,
       pcre_dfa_exec(),  which matches using a different algorithm that is not
       Perl-compatible. Some of the features discussed below are not available
       when  pcre_dfa_exec()  is used. The advantages and disadvantages of the
       alternative function, and how it differs from the normal function,  are
       discussed in the pcrematching page.


NEWLINE CONVENTIONS

       PCRE  supports five different conventions for indicating line breaks in
       strings: a single CR (carriage return) character, a  single  LF  (line-
       feed) character, the two-character sequence CRLF, any of the three pre-
       ceding, or any Unicode newline sequence. The pcreapi page  has  further
       discussion  about newlines, and shows how to set the newline convention
       in the options arguments for the compiling and matching functions.

       It is also possible to specify a newline convention by starting a  pat-
       tern string with one of the following five sequences:

         (*CR)        carriage return
         (*LF)        linefeed
         (*CRLF)      carriage return, followed by linefeed
         (*ANYCRLF)   any of the three above
         (*ANY)       all Unicode newline sequences

       These  override  the default and the options given to pcre_compile() or
       pcre_compile2(). For example, on a Unix system where LF is the  default
       newline sequence, the pattern

         (*CR)a.b

       changes the convention to CR. That pattern matches "a\nb" because LF is
       no longer a newline. Note that these special settings,  which  are  not
       Perl-compatible,  are  recognized  only at the very start of a pattern,
       and that they must be in upper case.  If  more  than  one  of  them  is
       present, the last one is used.

       The  newline  convention  does  not  affect what the \R escape sequence
       matches. By default, this is any Unicode  newline  sequence,  for  Perl
       compatibility.  However, this can be changed; see the description of \R
       in the section entitled "Newline sequences" below. A change of \R  set-
       ting can be combined with a change of newline convention.


CHARACTERS AND METACHARACTERS

       A  regular  expression  is  a pattern that is matched against a subject
       string from left to right. Most characters stand for  themselves  in  a
       pattern,  and  match  the corresponding characters in the subject. As a
       trivial example, the pattern

         The quick brown fox

       matches a portion of a subject string that is identical to itself. When
       caseless  matching is specified (the PCRE_CASELESS option), letters are
       matched independently of case. In UTF-8 mode, PCRE  always  understands
       the  concept  of case for characters whose values are less than 128, so
       caseless matching is always possible. For characters with  higher  val-
       ues,  the concept of case is supported if PCRE is compiled with Unicode
       property support, but not otherwise.   If  you  want  to  use  caseless
       matching  for  characters  128  and above, you must ensure that PCRE is
       compiled with Unicode property support as well as with UTF-8 support.

       The power of regular expressions comes  from  the  ability  to  include
       alternatives  and  repetitions in the pattern. These are encoded in the
       pattern by the use of metacharacters, which do not stand for themselves
       but instead are interpreted in some special way.

       There  are  two different sets of metacharacters: those that are recog-
       nized anywhere in the pattern except within square brackets, and  those
       that  are  recognized  within square brackets. Outside square brackets,
       the metacharacters are as follows:

         \      general escape character with several uses
         ^      assert start of string (or line, in multiline mode)
         $      assert end of string (or line, in multiline mode)
         .      match any character except newline (by default)
         [      start character class definition
         |      start of alternative branch
         (      start subpattern
         )      end subpattern
         ?      extends the meaning of (
                also 0 or 1 quantifier
                also quantifier minimizer
         *      0 or more quantifier
         +      1 or more quantifier
                also "possessive quantifier"
         {      start min/max quantifier

       Part of a pattern that is in square brackets  is  called  a  "character
       class". In a character class the only metacharacters are:

         \      general escape character
         ^      negate the class, but only if the first character
         -      indicates character range
         [      POSIX character class (only if followed by POSIX
                  syntax)
         ]      terminates the character class

       The following sections describe the use of each of the metacharacters.


BACKSLASH

       The backslash character has several uses. Firstly, if it is followed by
       a non-alphanumeric character, it takes away any  special  meaning  that
       character  may  have.  This  use  of  backslash  as an escape character
       applies both inside and outside character classes.

       For example, if you want to match a * character, you write  \*  in  the
       pattern.   This  escaping  action  applies whether or not the following
       character would otherwise be interpreted as a metacharacter, so  it  is
       always  safe  to  precede  a non-alphanumeric with backslash to specify
       that it stands for itself. In particular, if you want to match a  back-
       slash, you write \\.

       If  a  pattern is compiled with the PCRE_EXTENDED option, whitespace in
       the pattern (other than in a character class) and characters between  a
       # outside a character class and the next newline are ignored. An escap-
       ing backslash can be used to include a whitespace  or  #  character  as
       part of the pattern.

       If  you  want  to remove the special meaning from a sequence of charac-
       ters, you can do so by putting them between \Q and \E. This is  differ-
       ent  from  Perl  in  that  $  and  @ are handled as literals in \Q...\E
       sequences in PCRE, whereas in Perl, $ and @ cause  variable  interpola-
       tion. Note the following examples:

         Pattern            PCRE matches   Perl matches

         \Qabc$xyz\E        abc$xyz        abc followed by the
                                             contents of $xyz
         \Qabc\$xyz\E       abc\$xyz       abc\$xyz
         \Qabc\E\$\Qxyz\E   abc$xyz        abc$xyz

       The  \Q...\E  sequence  is recognized both inside and outside character
       classes.

   Non-printing characters

       A second use of backslash provides a way of encoding non-printing char-
       acters  in patterns in a visible manner. There is no restriction on the
       appearance of non-printing characters, apart from the binary zero  that
       terminates  a  pattern,  but  when  a pattern is being prepared by text
       editing, it is  often  easier  to  use  one  of  the  following  escape
       sequences than the binary character it represents:

         \a        alarm, that is, the BEL character (hex 07)
         \cx       "control-x", where x is any character
         \e        escape (hex 1B)
         \f        formfeed (hex 0C)
         \n        linefeed (hex 0A)
         \r        carriage return (hex 0D)
         \t        tab (hex 09)
         \ddd      character with octal code ddd, or back reference
         \xhh      character with hex code hh
         \x{hhh..} character with hex code hhh..

       The  precise  effect of \cx is as follows: if x is a lower case letter,
       it is converted to upper case. Then bit 6 of the character (hex 40)  is
       inverted.   Thus  \cz becomes hex 1A, but \c{ becomes hex 3B, while \c;
       becomes hex 7B.

       After \x, from zero to two hexadecimal digits are read (letters can  be
       in  upper  or  lower case). Any number of hexadecimal digits may appear
       between \x{ and }, but the value of the character  code  must  be  less
       than 256 in non-UTF-8 mode, and less than 2**31 in UTF-8 mode. That is,
       the maximum value in hexadecimal is 7FFFFFFF. Note that this is  bigger
       than the largest Unicode code point, which is 10FFFF.

       If  characters  other than hexadecimal digits appear between \x{ and },
       or if there is no terminating }, this form of escape is not recognized.
       Instead,  the  initial  \x  will  be interpreted as a basic hexadecimal
       escape, with no following digits, giving a  character  whose  value  is
       zero.

       Characters whose value is less than 256 can be defined by either of the
       two syntaxes for \x. There is no difference in the way  they  are  han-
       dled. For example, \xdc is exactly the same as \x{dc}.

       After  \0  up  to two further octal digits are read. If there are fewer
       than two digits, just  those  that  are  present  are  used.  Thus  the
       sequence \0\x\07 specifies two binary zeros followed by a BEL character
       (code value 7). Make sure you supply two digits after the initial  zero
       if the pattern character that follows is itself an octal digit.

       The handling of a backslash followed by a digit other than 0 is compli-
       cated.  Outside a character class, PCRE reads it and any following dig-
       its  as  a  decimal  number. If the number is less than 10, or if there
       have been at least that many previous capturing left parentheses in the
       expression,  the  entire  sequence  is  taken  as  a  back reference. A
       description of how this works is given later, following the  discussion
       of parenthesized subpatterns.

       Inside  a  character  class, or if the decimal number is greater than 9
       and there have not been that many capturing subpatterns, PCRE  re-reads
       up to three octal digits following the backslash, and uses them to gen-
       erate a data character. Any subsequent digits stand for themselves.  In
       non-UTF-8  mode,  the  value  of a character specified in octal must be
       less than \400. In UTF-8 mode, values up to  \777  are  permitted.  For
       example:

         \040   is another way of writing a space
         \40    is the same, provided there are fewer than 40
                   previous capturing subpatterns
         \7     is always a back reference
         \11    might be a back reference, or another way of
                   writing a tab
         \011   is always a tab
         \0113  is a tab followed by the character "3"
         \113   might be a back reference, otherwise the
                   character with octal code 113
         \377   might be a back reference, otherwise
                   the byte consisting entirely of 1 bits
         \81    is either a back reference, or a binary zero
                   followed by the two characters "8" and "1"

       Note  that  octal  values of 100 or greater must not be introduced by a
       leading zero, because no more than three octal digits are ever read.

       All the sequences that define a single character value can be used both
       inside  and  outside character classes. In addition, inside a character
       class, the sequence \b is interpreted as the backspace  character  (hex
       08),  and the sequences \R and \X are interpreted as the characters "R"
       and "X", respectively. Outside a character class, these sequences  have
       different meanings (see below).

   Absolute and relative back references

       The  sequence  \g followed by an unsigned or a negative number, option-
       ally enclosed in braces, is an absolute or relative back  reference.  A
       named back reference can be coded as \g{name}. Back references are dis-
       cussed later, following the discussion of parenthesized subpatterns.

   Absolute and relative subroutine calls

       For compatibility with Oniguruma, the non-Perl syntax \g followed by  a
       name or a number enclosed either in angle brackets or single quotes, is
       an alternative syntax for referencing a subpattern as  a  "subroutine".
       Details  are  discussed  later.   Note  that  \g{...} (Perl syntax) and
       \g<...> (Oniguruma syntax) are not synonymous. The  former  is  a  back
       reference; the latter is a subroutine call.

   Generic character types

       Another use of backslash is for specifying generic character types. The
       following are always recognized:

         \d     any decimal digit
         \D     any character that is not a decimal digit
         \h     any horizontal whitespace character
         \H     any character that is not a horizontal whitespace character
         \s     any whitespace character
         \S     any character that is not a whitespace character
         \v     any vertical whitespace character
         \V     any character that is not a vertical whitespace character
         \w     any "word" character
         \W     any "non-word" character

       Each pair of escape sequences partitions the complete set of characters
       into  two disjoint sets. Any given character matches one, and only one,
       of each pair.

       These character type sequences can appear both inside and outside char-
       acter  classes.  They each match one character of the appropriate type.
       If the current matching point is at the end of the subject string,  all
       of them fail, since there is no character to match.

       For  compatibility  with Perl, \s does not match the VT character (code
       11).  This makes it different from the the POSIX "space" class. The  \s
       characters  are  HT  (9), LF (10), FF (12), CR (13), and space (32). If
       "use locale;" is included in a Perl script, \s may match the VT charac-
       ter. In PCRE, it never does.

       In  UTF-8 mode, characters with values greater than 128 never match \d,
       \s, or \w, and always match \D, \S, and \W. This is true even when Uni-
       code  character  property  support is available. These sequences retain
       their original meanings from before UTF-8 support was available, mainly
       for  efficiency  reasons. Note that this also affects \b, because it is
       defined in terms of \w and \W.

       The sequences \h, \H, \v, and \V are Perl 5.10 features. In contrast to
       the  other  sequences, these do match certain high-valued codepoints in
       UTF-8 mode.  The horizontal space characters are:

         U+0009     Horizontal tab
         U+0020     Space
         U+00A0     Non-break space
         U+1680     Ogham space mark
         U+180E     Mongolian vowel separator
         U+2000     En quad
         U+2001     Em quad
         U+2002     En space
         U+2003     Em space
         U+2004     Three-per-em space
         U+2005     Four-per-em space
         U+2006     Six-per-em space
         U+2007     Figure space
         U+2008     Punctuation space
         U+2009     Thin space
         U+200A     Hair space
         U+202F     Narrow no-break space
         U+205F     Medium mathematical space
         U+3000     Ideographic space

       The vertical space characters are:

         U+000A     Linefeed
         U+000B     Vertical tab
         U+000C     Formfeed
         U+000D     Carriage return
         U+0085     Next line
         U+2028     Line separator
         U+2029     Paragraph separator

       A "word" character is an underscore or any character less than 256 that
       is  a  letter  or  digit.  The definition of letters and digits is con-
       trolled by PCRE's low-valued character tables, and may vary if  locale-
       specific  matching is taking place (see "Locale support" in the pcreapi
       page). For example, in a French locale such  as  "fr_FR"  in  Unix-like
       systems,  or "french" in Windows, some character codes greater than 128
       are used for accented letters, and these are matched by \w. The use  of
       locales with Unicode is discouraged.

   Newline sequences

       Outside  a  character class, by default, the escape sequence \R matches
       any Unicode newline sequence. This is a Perl 5.10 feature. In non-UTF-8
       mode \R is equivalent to the following:

         (?>\r\n|\n|\x0b|\f|\r|\x85)

       This  is  an  example  of an "atomic group", details of which are given
       below.  This particular group matches either the two-character sequence
       CR  followed  by  LF,  or  one  of  the single characters LF (linefeed,
       U+000A), VT (vertical tab, U+000B), FF (formfeed, U+000C), CR (carriage
       return, U+000D), or NEL (next line, U+0085). The two-character sequence
       is treated as a single unit that cannot be split.

       In UTF-8 mode, two additional characters whose codepoints  are  greater
       than 255 are added: LS (line separator, U+2028) and PS (paragraph sepa-
       rator, U+2029).  Unicode character property support is not  needed  for
       these characters to be recognized.

       It is possible to restrict \R to match only CR, LF, or CRLF (instead of
       the complete set  of  Unicode  line  endings)  by  setting  the  option
       PCRE_BSR_ANYCRLF either at compile time or when the pattern is matched.
       (BSR is an abbrevation for "backslash R".) This can be made the default
       when  PCRE  is  built;  if this is the case, the other behaviour can be
       requested via the PCRE_BSR_UNICODE option.   It  is  also  possible  to
       specify  these  settings  by  starting a pattern string with one of the
       following sequences:

         (*BSR_ANYCRLF)   CR, LF, or CRLF only
         (*BSR_UNICODE)   any Unicode newline sequence

       These override the default and the options given to  pcre_compile()  or
       pcre_compile2(),  but  they  can  be  overridden  by  options  given to
       pcre_exec() or pcre_dfa_exec(). Note that these special settings, which
       are  not  Perl-compatible,  are  recognized only at the very start of a
       pattern, and that they must be in upper case. If more than one of  them
       is present, the last one is used. They can be combined with a change of
       newline convention, for example, a pattern can start with:

         (*ANY)(*BSR_ANYCRLF)

       Inside a character class, \R matches the letter "R".

   Unicode character properties

       When PCRE is built with Unicode character property support, three addi-
       tional  escape sequences that match characters with specific properties
       are available.  When not in UTF-8 mode, these sequences are  of  course
       limited  to  testing characters whose codepoints are less than 256, but
       they do work in this mode.  The extra escape sequences are:

         \p{xx}   a character with the xx property
         \P{xx}   a character without the xx property
         \X       an extended Unicode sequence

       The property names represented by xx above are limited to  the  Unicode
       script names, the general category properties, and "Any", which matches
       any character (including newline). Other properties such as "InMusical-
       Symbols"  are  not  currently supported by PCRE. Note that \P{Any} does
       not match any characters, so always causes a match failure.

       Sets of Unicode characters are defined as belonging to certain scripts.
       A  character from one of these sets can be matched using a script name.
       For example:

         \p{Greek}
         \P{Han}

       Those that are not part of an identified script are lumped together  as
       "Common". The current list of scripts is:

       Arabic, Armenian, Avestan, Balinese, Bamum, Bengali, Bopomofo, Braille,
       Buginese, Buhid, Canadian_Aboriginal, Carian, Cham,  Cherokee,  Common,
       Coptic,   Cuneiform,  Cypriot,  Cyrillic,  Deseret,  Devanagari,  Egyp-
       tian_Hieroglyphs,  Ethiopic,  Georgian,  Glagolitic,   Gothic,   Greek,
       Gujarati,  Gurmukhi,  Han,  Hangul,  Hanunoo,  Hebrew,  Hiragana, Impe-
       rial_Aramaic, Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian,
       Javanese,  Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khmer, Lao,
       Latin,  Lepcha,  Limbu,  Linear_B,  Lisu,  Lycian,  Lydian,  Malayalam,
       Meetei_Mayek,  Mongolian, Myanmar, New_Tai_Lue, Nko, Ogham, Old_Italic,
       Old_Persian, Old_South_Arabian, Old_Turkic, Ol_Chiki,  Oriya,  Osmanya,
       Phags_Pa,  Phoenician,  Rejang,  Runic, Samaritan, Saurashtra, Shavian,
       Sinhala, Sundanese, Syloti_Nagri, Syriac,  Tagalog,  Tagbanwa,  Tai_Le,
       Tai_Tham,  Tai_Viet,  Tamil,  Telugu,  Thaana, Thai, Tibetan, Tifinagh,
       Ugaritic, Vai, Yi.

       Each character has exactly one general category property, specified  by
       a two-letter abbreviation. For compatibility with Perl, negation can be
       specified by including a circumflex between the opening brace  and  the
       property name. For example, \p{^Lu} is the same as \P{Lu}.

       If only one letter is specified with \p or \P, it includes all the gen-
       eral category properties that start with that letter. In this case,  in
       the  absence of negation, the curly brackets in the escape sequence are
       optional; these two examples have the same effect:

         \p{L}
         \pL

       The following general category property codes are supported:

         C     Other
         Cc    Control
         Cf    Format
         Cn    Unassigned
         Co    Private use
         Cs    Surrogate

         L     Letter
         Ll    Lower case letter
         Lm    Modifier letter
         Lo    Other letter
         Lt    Title case letter
         Lu    Upper case letter

         M     Mark
         Mc    Spacing mark
         Me    Enclosing mark
         Mn    Non-spacing mark

         N     Number
         Nd    Decimal number
         Nl    Letter number
         No    Other number

         P     Punctuation
         Pc    Connector punctuation
         Pd    Dash punctuation
         Pe    Close punctuation
         Pf    Final punctuation
         Pi    Initial punctuation
         Po    Other punctuation
         Ps    Open punctuation

         S     Symbol
         Sc    Currency symbol
         Sk    Modifier symbol
         Sm    Mathematical symbol
         So    Other symbol

         Z     Separator
         Zl    Line separator
         Zp    Paragraph separator
         Zs    Space separator

       The special property L& is also supported: it matches a character  that
       has  the  Lu,  Ll, or Lt property, in other words, a letter that is not
       classified as a modifier or "other".

       The Cs (Surrogate) property applies only to  characters  in  the  range
       U+D800  to  U+DFFF. Such characters are not valid in UTF-8 strings (see
       RFC 3629) and so cannot be tested by PCRE, unless UTF-8 validity check-
       ing  has  been  turned off (see the discussion of PCRE_NO_UTF8_CHECK in
       the pcreapi page). Perl does not support the Cs property.

       The long synonyms for  property  names  that  Perl  supports  (such  as
       \p{Letter})  are  not  supported by PCRE, nor is it permitted to prefix
       any of these properties with "Is".

       No character that is in the Unicode table has the Cn (unassigned) prop-
       erty.  Instead, this property is assumed for any code point that is not
       in the Unicode table.

       Specifying caseless matching does not affect  these  escape  sequences.
       For example, \p{Lu} always matches only upper case letters.

       The  \X  escape  matches  any number of Unicode characters that form an
       extended Unicode sequence. \X is equivalent to

         (?>\PM\pM*)

       That is, it matches a character without the "mark"  property,  followed
       by  zero  or  more  characters with the "mark" property, and treats the
       sequence as an atomic group (see below).  Characters  with  the  "mark"
       property  are  typically  accents  that affect the preceding character.
       None of them have codepoints less than 256, so  in  non-UTF-8  mode  \X
       matches any one character.

       Matching  characters  by Unicode property is not fast, because PCRE has
       to search a structure that contains  data  for  over  fifteen  thousand
       characters. That is why the traditional escape sequences such as \d and
       \w do not use Unicode properties in PCRE.

   Resetting the match start

       The escape sequence \K, which is a Perl 5.10 feature, causes any previ-
       ously  matched  characters  not  to  be  included  in the final matched
       sequence. For example, the pattern:

         foo\Kbar

       matches "foobar", but reports that it has matched "bar".  This  feature
       is  similar  to  a lookbehind assertion (described below).  However, in
       this case, the part of the subject before the real match does not  have
       to  be of fixed length, as lookbehind assertions do. The use of \K does
       not interfere with the setting of captured  substrings.   For  example,
       when the pattern

         (foo)\Kbar

       matches "foobar", the first substring is still set to "foo".

       Perl  documents  that  the  use  of  \K  within assertions is "not well
       defined". In PCRE, \K is acted upon  when  it  occurs  inside  positive
       assertions, but is ignored in negative assertions.

   Simple assertions

       The  final use of backslash is for certain simple assertions. An asser-
       tion specifies a condition that has to be met at a particular point  in
       a  match, without consuming any characters from the subject string. The
       use of subpatterns for more complicated assertions is described  below.
       The backslashed assertions are:

         \b     matches at a word boundary
         \B     matches when not at a word boundary
         \A     matches at the start of the subject
         \Z     matches at the end of the subject
                 also matches before a newline at the end of the subject
         \z     matches only at the end of the subject
         \G     matches at the first matching position in the subject

       These  assertions may not appear in character classes (but note that \b
       has a different meaning, namely the backspace character, inside a char-
       acter class).

       A  word  boundary is a position in the subject string where the current
       character and the previous character do not both match \w or  \W  (i.e.
       one  matches  \w  and the other matches \W), or the start or end of the
       string if the first or last character matches \w, respectively. Neither
       PCRE  nor  Perl  has a separte "start of word" or "end of word" metase-
       quence. However, whatever follows \b normally determines which  it  is.
       For example, the fragment \ba matches "a" at the start of a word.

       The  \A,  \Z,  and \z assertions differ from the traditional circumflex
       and dollar (described in the next section) in that they only ever match
       at  the  very start and end of the subject string, whatever options are
       set. Thus, they are independent of multiline mode. These  three  asser-
       tions are not affected by the PCRE_NOTBOL or PCRE_NOTEOL options, which
       affect only the behaviour of the circumflex and dollar  metacharacters.
       However,  if the startoffset argument of pcre_exec() is non-zero, indi-
       cating that matching is to start at a point other than the beginning of
       the  subject,  \A  can never match. The difference between \Z and \z is
       that \Z matches before a newline at the end of the string as well as at
       the very end, whereas \z matches only at the end.

       The  \G assertion is true only when the current matching position is at
       the start point of the match, as specified by the startoffset  argument
       of  pcre_exec().  It  differs  from \A when the value of startoffset is
       non-zero. By calling pcre_exec() multiple times with appropriate  argu-
       ments, you can mimic Perl's /g option, and it is in this kind of imple-
       mentation where \G can be useful.

       Note, however, that PCRE's interpretation of \G, as the  start  of  the
       current match, is subtly different from Perl's, which defines it as the
       end of the previous match. In Perl, these can  be  different  when  the
       previously  matched  string was empty. Because PCRE does just one match
       at a time, it cannot reproduce this behaviour.

       If all the alternatives of a pattern begin with \G, the  expression  is
       anchored to the starting match position, and the "anchored" flag is set
       in the compiled regular expression.


CIRCUMFLEX AND DOLLAR

       Outside a character class, in the default matching mode, the circumflex
       character  is  an  assertion  that is true only if the current matching
       point is at the start of the subject string. If the  startoffset  argu-
       ment  of  pcre_exec()  is  non-zero,  circumflex can never match if the
       PCRE_MULTILINE option is unset. Inside a  character  class,  circumflex
       has an entirely different meaning (see below).

       Circumflex  need  not be the first character of the pattern if a number
       of alternatives are involved, but it should be the first thing in  each
       alternative  in  which  it appears if the pattern is ever to match that
       branch. If all possible alternatives start with a circumflex, that  is,
       if  the  pattern  is constrained to match only at the start of the sub-
       ject, it is said to be an "anchored" pattern.  (There  are  also  other
       constructs that can cause a pattern to be anchored.)

       A  dollar  character  is  an assertion that is true only if the current
       matching point is at the end of  the  subject  string,  or  immediately
       before a newline at the end of the string (by default). Dollar need not
       be the last character of the pattern if a number  of  alternatives  are
       involved,  but  it  should  be  the last item in any branch in which it
       appears. Dollar has no special meaning in a character class.

       The meaning of dollar can be changed so that it  matches  only  at  the
       very  end  of  the string, by setting the PCRE_DOLLAR_ENDONLY option at
       compile time. This does not affect the \Z assertion.

       The meanings of the circumflex and dollar characters are changed if the
       PCRE_MULTILINE  option  is  set.  When  this  is the case, a circumflex
       matches immediately after internal newlines as well as at the start  of
       the  subject  string.  It  does not match after a newline that ends the
       string. A dollar matches before any newlines in the string, as well  as
       at  the very end, when PCRE_MULTILINE is set. When newline is specified
       as the two-character sequence CRLF, isolated CR and  LF  characters  do
       not indicate newlines.

       For  example, the pattern /^abc$/ matches the subject string "def\nabc"
       (where \n represents a newline) in multiline mode, but  not  otherwise.
       Consequently,  patterns  that  are anchored in single line mode because
       all branches start with ^ are not anchored in  multiline  mode,  and  a
       match  for  circumflex  is  possible  when  the startoffset argument of
       pcre_exec() is non-zero. The PCRE_DOLLAR_ENDONLY option is  ignored  if
       PCRE_MULTILINE is set.

       Note  that  the sequences \A, \Z, and \z can be used to match the start
       and end of the subject in both modes, and if all branches of a  pattern
       start  with  \A it is always anchored, whether or not PCRE_MULTILINE is
       set.


FULL STOP (PERIOD, DOT)

       Outside a character class, a dot in the pattern matches any one charac-
       ter  in  the subject string except (by default) a character that signi-
       fies the end of a line. In UTF-8 mode, the  matched  character  may  be
       more than one byte long.

       When  a line ending is defined as a single character, dot never matches
       that character; when the two-character sequence CRLF is used, dot  does
       not  match  CR  if  it  is immediately followed by LF, but otherwise it
       matches all characters (including isolated CRs and LFs). When any  Uni-
       code  line endings are being recognized, dot does not match CR or LF or
       any of the other line ending characters.

       The behaviour of dot with regard to newlines can  be  changed.  If  the
       PCRE_DOTALL  option  is  set,  a dot matches any one character, without
       exception. If the two-character sequence CRLF is present in the subject
       string, it takes two dots to match it.

       The  handling of dot is entirely independent of the handling of circum-
       flex and dollar, the only relationship being  that  they  both  involve
       newlines. Dot has no special meaning in a character class.


MATCHING A SINGLE BYTE

       Outside a character class, the escape sequence \C matches any one byte,
       both in and out of UTF-8 mode. Unlike a  dot,  it  always  matches  any
       line-ending  characters.  The  feature  is provided in Perl in order to
       match individual bytes in UTF-8 mode. Because it breaks up UTF-8  char-
       acters  into individual bytes, what remains in the string may be a mal-
       formed UTF-8 string. For this reason, the \C escape  sequence  is  best
       avoided.

       PCRE  does  not  allow \C to appear in lookbehind assertions (described
       below), because in UTF-8 mode this would make it impossible  to  calcu-
       late the length of the lookbehind.


SQUARE BRACKETS AND CHARACTER CLASSES

       An opening square bracket introduces a character class, terminated by a
       closing square bracket. A closing square bracket on its own is not spe-
       cial by default.  However, if the PCRE_JAVASCRIPT_COMPAT option is set,
       a lone closing square bracket causes a compile-time error. If a closing
       square  bracket  is required as a member of the class, it should be the
       first data character in the class  (after  an  initial  circumflex,  if
       present) or escaped with a backslash.

       A  character  class matches a single character in the subject. In UTF-8
       mode, the character may be more than one byte long. A matched character
       must be in the set of characters defined by the class, unless the first
       character in the class definition is a circumflex, in  which  case  the
       subject  character  must  not  be in the set defined by the class. If a
       circumflex is actually required as a member of the class, ensure it  is
       not the first character, or escape it with a backslash.

       For  example, the character class [aeiou] matches any lower case vowel,
       while [^aeiou] matches any character that is not a  lower  case  vowel.
       Note that a circumflex is just a convenient notation for specifying the
       characters that are in the class by enumerating those that are  not.  A
       class  that starts with a circumflex is not an assertion; it still con-
       sumes a character from the subject string, and therefore  it  fails  if
       the current pointer is at the end of the string.

       In  UTF-8 mode, characters with values greater than 255 can be included
       in a class as a literal string of bytes, or by using the  \x{  escaping
       mechanism.

       When  caseless  matching  is set, any letters in a class represent both
       their upper case and lower case versions, so for  example,  a  caseless
       [aeiou]  matches  "A"  as well as "a", and a caseless [^aeiou] does not
       match "A", whereas a caseful version would. In UTF-8 mode, PCRE  always
       understands  the  concept  of case for characters whose values are less
       than 128, so caseless matching is always possible. For characters  with
       higher  values,  the  concept  of case is supported if PCRE is compiled
       with Unicode property support, but not otherwise.  If you want  to  use
       caseless  matching  in UTF8-mode for characters 128 and above, you must
       ensure that PCRE is compiled with Unicode property support as  well  as
       with UTF-8 support.

       Characters  that  might  indicate  line breaks are never treated in any
       special way  when  matching  character  classes,  whatever  line-ending
       sequence  is  in  use,  and  whatever  setting  of  the PCRE_DOTALL and
       PCRE_MULTILINE options is used. A class such as [^a] always matches one
       of these characters.

       The  minus (hyphen) character can be used to specify a range of charac-
       ters in a character  class.  For  example,  [d-m]  matches  any  letter
       between  d  and  m,  inclusive.  If  a minus character is required in a
       class, it must be escaped with a backslash  or  appear  in  a  position
       where  it cannot be interpreted as indicating a range, typically as the
       first or last character in the class.

       It is not possible to have the literal character "]" as the end charac-
       ter  of a range. A pattern such as [W-]46] is interpreted as a class of
       two characters ("W" and "-") followed by a literal string "46]", so  it
       would  match  "W46]"  or  "-46]". However, if the "]" is escaped with a
       backslash it is interpreted as the end of range, so [W-\]46] is  inter-
       preted  as a class containing a range followed by two other characters.
       The octal or hexadecimal representation of "]" can also be used to  end
       a range.

       Ranges  operate in the collating sequence of character values. They can
       also  be  used  for  characters  specified  numerically,  for   example
       [\000-\037].  In UTF-8 mode, ranges can include characters whose values
       are greater than 255, for example [\x{100}-\x{2ff}].

       If a range that includes letters is used when caseless matching is set,
       it matches the letters in either case. For example, [W-c] is equivalent
       to [][\\^_`wxyzabc], matched caselessly,  and  in  non-UTF-8  mode,  if
       character  tables  for  a French locale are in use, [\xc8-\xcb] matches
       accented E characters in both cases. In UTF-8 mode, PCRE  supports  the
       concept  of  case for characters with values greater than 128 only when
       it is compiled with Unicode property support.

       The character types \d, \D, \p, \P, \s, \S, \w, and \W may also  appear
       in  a  character  class,  and add the characters that they match to the
       class. For example, [\dABCDEF] matches any hexadecimal digit. A circum-
       flex  can  conveniently  be used with the upper case character types to
       specify a more restricted set of characters  than  the  matching  lower
       case  type.  For example, the class [^\W_] matches any letter or digit,
       but not underscore.

       The only metacharacters that are recognized in  character  classes  are
       backslash,  hyphen  (only  where  it can be interpreted as specifying a
       range), circumflex (only at the start), opening  square  bracket  (only
       when  it can be interpreted as introducing a POSIX class name - see the
       next section), and the terminating  closing  square  bracket.  However,
       escaping other non-alphanumeric characters does no harm.


POSIX CHARACTER CLASSES

       Perl supports the POSIX notation for character classes. This uses names
       enclosed by [: and :] within the enclosing square brackets.  PCRE  also
       supports this notation. For example,

         [01[:alpha:]%]

       matches "0", "1", any alphabetic character, or "%". The supported class
       names are

         alnum    letters and digits
         alpha    letters
         ascii    character codes 0 - 127
         blank    space or tab only
         cntrl    control characters
         digit    decimal digits (same as \d)
         graph    printing characters, excluding space
         lower    lower case letters
         print    printing characters, including space
         punct    printing characters, excluding letters and digits
         space    white space (not quite the same as \s)
         upper    upper case letters
         word     "word" characters (same as \w)
         xdigit   hexadecimal digits

       The "space" characters are HT (9), LF (10), VT (11), FF (12), CR  (13),
       and  space  (32). Notice that this list includes the VT character (code
       11). This makes "space" different to \s, which does not include VT (for
       Perl compatibility).

       The  name  "word"  is  a Perl extension, and "blank" is a GNU extension
       from Perl 5.8. Another Perl extension is negation, which  is  indicated
       by a ^ character after the colon. For example,

         [12[:^digit:]]

       matches  "1", "2", or any non-digit. PCRE (and Perl) also recognize the
       POSIX syntax [.ch.] and [=ch=] where "ch" is a "collating element", but
       these are not supported, and an error is given if they are encountered.

       In UTF-8 mode, characters with values greater than 128 do not match any
       of the POSIX character classes.


VERTICAL BAR

       Vertical bar characters are used to separate alternative patterns.  For
       example, the pattern

         gilbert|sullivan

       matches  either "gilbert" or "sullivan". Any number of alternatives may
       appear, and an empty  alternative  is  permitted  (matching  the  empty
       string). The matching process tries each alternative in turn, from left
       to right, and the first one that succeeds is used. If the  alternatives
       are  within a subpattern (defined below), "succeeds" means matching the
       rest of the main pattern as well as the alternative in the subpattern.


INTERNAL OPTION SETTING

       The settings of the  PCRE_CASELESS,  PCRE_MULTILINE,  PCRE_DOTALL,  and
       PCRE_EXTENDED  options  (which are Perl-compatible) can be changed from
       within the pattern by  a  sequence  of  Perl  option  letters  enclosed
       between "(?" and ")".  The option letters are

         i  for PCRE_CASELESS
         m  for PCRE_MULTILINE
         s  for PCRE_DOTALL
         x  for PCRE_EXTENDED

       For example, (?im) sets caseless, multiline matching. It is also possi-
       ble to unset these options by preceding the letter with a hyphen, and a
       combined  setting and unsetting such as (?im-sx), which sets PCRE_CASE-
       LESS and PCRE_MULTILINE while unsetting PCRE_DOTALL and  PCRE_EXTENDED,
       is  also  permitted.  If  a  letter  appears  both before and after the
       hyphen, the option is unset.

       The PCRE-specific options PCRE_DUPNAMES, PCRE_UNGREEDY, and  PCRE_EXTRA
       can  be changed in the same way as the Perl-compatible options by using
       the characters J, U and X respectively.

       When one of these option changes occurs at  top  level  (that  is,  not
       inside  subpattern parentheses), the change applies to the remainder of
       the pattern that follows. If the change is placed right at the start of
       a pattern, PCRE extracts it into the global options (and it will there-
       fore show up in data extracted by the pcre_fullinfo() function).

       An option change within a subpattern (see below for  a  description  of
       subpatterns) affects only that part of the current pattern that follows
       it, so

         (a(?i)b)c

       matches abc and aBc and no other strings (assuming PCRE_CASELESS is not
       used).   By  this means, options can be made to have different settings
       in different parts of the pattern. Any changes made in one  alternative
       do  carry  on  into subsequent branches within the same subpattern. For
       example,

         (a(?i)b|c)

       matches "ab", "aB", "c", and "C", even though  when  matching  "C"  the
       first  branch  is  abandoned before the option setting. This is because
       the effects of option settings happen at compile time. There  would  be
       some very weird behaviour otherwise.

       Note:  There  are  other  PCRE-specific  options that can be set by the
       application when the compile or match functions  are  called.  In  some
       cases the pattern can contain special leading sequences such as (*CRLF)
       to override what the application has set or what  has  been  defaulted.
       Details  are  given  in the section entitled "Newline sequences" above.
       There is also the (*UTF8) leading sequence that  can  be  used  to  set
       UTF-8 mode; this is equivalent to setting the PCRE_UTF8 option.


SUBPATTERNS

       Subpatterns are delimited by parentheses (round brackets), which can be
       nested.  Turning part of a pattern into a subpattern does two things:

       1. It localizes a set of alternatives. For example, the pattern

         cat(aract|erpillar|)

       matches one of the words "cat", "cataract", or  "caterpillar".  Without
       the  parentheses,  it  would  match  "cataract", "erpillar" or an empty
       string.

       2. It sets up the subpattern as  a  capturing  subpattern.  This  means
       that,  when  the  whole  pattern  matches,  that portion of the subject
       string that matched the subpattern is passed back to the caller via the
       ovector  argument  of pcre_exec(). Opening parentheses are counted from
       left to right (starting from 1) to obtain  numbers  for  the  capturing
       subpatterns.

       For  example,  if the string "the red king" is matched against the pat-
       tern

         the ((red|white) (king|queen))

       the captured substrings are "red king", "red", and "king", and are num-
       bered 1, 2, and 3, respectively.

       The  fact  that  plain  parentheses  fulfil two functions is not always
       helpful.  There are often times when a grouping subpattern is  required
       without  a capturing requirement. If an opening parenthesis is followed
       by a question mark and a colon, the subpattern does not do any  captur-
       ing,  and  is  not  counted when computing the number of any subsequent
       capturing subpatterns. For example, if the string "the white queen"  is
       matched against the pattern

         the ((?:red|white) (king|queen))

       the captured substrings are "white queen" and "queen", and are numbered
       1 and 2. The maximum number of capturing subpatterns is 65535.

       As a convenient shorthand, if any option settings are required  at  the
       start  of  a  non-capturing  subpattern,  the option letters may appear
       between the "?" and the ":". Thus the two patterns

         (?i:saturday|sunday)
         (?:(?i)saturday|sunday)

       match exactly the same set of strings. Because alternative branches are
       tried  from  left  to right, and options are not reset until the end of
       the subpattern is reached, an option setting in one branch does  affect
       subsequent  branches,  so  the above patterns match "SUNDAY" as well as
       "Saturday".


DUPLICATE SUBPATTERN NUMBERS

       Perl 5.10 introduced a feature whereby each alternative in a subpattern
       uses  the same numbers for its capturing parentheses. Such a subpattern
       starts with (?| and is itself a non-capturing subpattern. For  example,
       consider this pattern:

         (?|(Sat)ur|(Sun))day

       Because  the two alternatives are inside a (?| group, both sets of cap-
       turing parentheses are numbered one. Thus, when  the  pattern  matches,
       you  can  look  at captured substring number one, whichever alternative
       matched. This construct is useful when you want to  capture  part,  but
       not all, of one of a number of alternatives. Inside a (?| group, paren-
       theses are numbered as usual, but the number is reset at the  start  of
       each  branch. The numbers of any capturing buffers that follow the sub-
       pattern start after the highest number used in any branch. The  follow-
       ing  example  is taken from the Perl documentation.  The numbers under-
       neath show in which buffer the captured content will be stored.

         # before  ---------------branch-reset----------- after
         / ( a )  (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
         # 1            2         2  3        2     3     4

       A back reference to a numbered subpattern uses the  most  recent  value
       that  is  set  for that number by any subpattern. The following pattern
       matches "abcabc" or "defdef":

         /(?|(abc)|(def))\1/

       In contrast, a recursive or "subroutine" call to a numbered  subpattern
       always  refers  to  the first one in the pattern with the given number.
       The following pattern matches "abcabc" or "defabc":

         /(?|(abc)|(def))(?1)/

       If a condition test for a subpattern's having matched refers to a  non-
       unique  number, the test is true if any of the subpatterns of that num-
       ber have matched.

       An alternative approach to using this "branch reset" feature is to  use
       duplicate named subpatterns, as described in the next section.


NAMED SUBPATTERNS

       Identifying  capturing  parentheses  by number is simple, but it can be
       very hard to keep track of the numbers in complicated  regular  expres-
       sions.  Furthermore,  if  an  expression  is  modified, the numbers may
       change. To help with this difficulty, PCRE supports the naming of  sub-
       patterns. This feature was not added to Perl until release 5.10. Python
       had the feature earlier, and PCRE introduced it at release  4.0,  using
       the  Python syntax. PCRE now supports both the Perl and the Python syn-
       tax. Perl allows identically numbered  subpatterns  to  have  different
       names, but PCRE does not.

       In  PCRE,  a subpattern can be named in one of three ways: (?<name>...)
       or (?'name'...) as in Perl, or (?P<name>...) as in  Python.  References
       to  capturing parentheses from other parts of the pattern, such as back
       references, recursion, and conditions, can be made by name as  well  as
       by number.

       Names  consist  of  up  to  32 alphanumeric characters and underscores.
       Named capturing parentheses are still  allocated  numbers  as  well  as
       names,  exactly as if the names were not present. The PCRE API provides
       function calls for extracting the name-to-number translation table from
       a compiled pattern. There is also a convenience function for extracting
       a captured substring by name.

       By default, a name must be unique within a pattern, but it is  possible
       to relax this constraint by setting the PCRE_DUPNAMES option at compile
       time. (Duplicate names are also always permitted for  subpatterns  with
       the  same  number, set up as described in the previous section.) Dupli-
       cate names can be useful for patterns where only one  instance  of  the
       named  parentheses  can  match. Suppose you want to match the name of a
       weekday, either as a 3-letter abbreviation or as the full name, and  in
       both cases you want to extract the abbreviation. This pattern (ignoring
       the line breaks) does the job:

         (?<DN>Mon|Fri|Sun)(?:day)?|
         (?<DN>Tue)(?:sday)?|
         (?<DN>Wed)(?:nesday)?|
         (?<DN>Thu)(?:rsday)?|
         (?<DN>Sat)(?:urday)?

       There are five capturing substrings, but only one is ever set  after  a
       match.  (An alternative way of solving this problem is to use a "branch
       reset" subpattern, as described in the previous section.)

       The convenience function for extracting the data by  name  returns  the
       substring  for  the first (and in this example, the only) subpattern of
       that name that matched. This saves searching  to  find  which  numbered
       subpattern it was.

       If  you  make  a  back  reference to a non-unique named subpattern from
       elsewhere in the pattern, the one that corresponds to the first  occur-
       rence of the name is used. In the absence of duplicate numbers (see the
       previous section) this is the one with the lowest number. If you use  a
       named  reference  in a condition test (see the section about conditions
       below), either to check whether a subpattern has matched, or  to  check
       for  recursion,  all  subpatterns with the same name are tested. If the
       condition is true for any one of them, the overall condition  is  true.
       This is the same behaviour as testing by number. For further details of
       the interfaces for handling named subpatterns, see the pcreapi documen-
       tation.

       Warning: You cannot use different names to distinguish between two sub-
       patterns with the same number because PCRE uses only the  numbers  when
       matching. For this reason, an error is given at compile time if differ-
       ent names are given to subpatterns with the same number.  However,  you
       can  give  the same name to subpatterns with the same number, even when
       PCRE_DUPNAMES is not set.


REPETITION

       Repetition is specified by quantifiers, which can  follow  any  of  the
       following items:

         a literal data character
         the dot metacharacter
         the \C escape sequence
         the \X escape sequence (in UTF-8 mode with Unicode properties)
         the \R escape sequence
         an escape such as \d that matches a single character
         a character class
         a back reference (see next section)
         a parenthesized subpattern (unless it is an assertion)
         a recursive or "subroutine" call to a subpattern

       The  general repetition quantifier specifies a minimum and maximum num-
       ber of permitted matches, by giving the two numbers in  curly  brackets
       (braces),  separated  by  a comma. The numbers must be less than 65536,
       and the first must be less than or equal to the second. For example:

         z{2,4}

       matches "zz", "zzz", or "zzzz". A closing brace on its  own  is  not  a
       special  character.  If  the second number is omitted, but the comma is
       present, there is no upper limit; if the second number  and  the  comma
       are  both omitted, the quantifier specifies an exact number of required
       matches. Thus

         [aeiou]{3,}

       matches at least 3 successive vowels, but may match many more, while

         \d{8}

       matches exactly 8 digits. An opening curly bracket that  appears  in  a
       position  where a quantifier is not allowed, or one that does not match
       the syntax of a quantifier, is taken as a literal character. For  exam-
       ple, {,6} is not a quantifier, but a literal string of four characters.

       In  UTF-8  mode,  quantifiers  apply to UTF-8 characters rather than to
       individual bytes. Thus, for example, \x{100}{2} matches two UTF-8 char-
       acters, each of which is represented by a two-byte sequence. Similarly,
       when Unicode property support is available, \X{3} matches three Unicode
       extended  sequences,  each of which may be several bytes long (and they
       may be of different lengths).

       The quantifier {0} is permitted, causing the expression to behave as if
       the previous item and the quantifier were not present. This may be use-
       ful for subpatterns that are referenced as subroutines  from  elsewhere
       in the pattern. Items other than subpatterns that have a {0} quantifier
       are omitted from the compiled pattern.

       For convenience, the three most common quantifiers have  single-charac-
       ter abbreviations:

         *    is equivalent to {0,}
         +    is equivalent to {1,}
         ?    is equivalent to {0,1}

       It  is  possible  to construct infinite loops by following a subpattern
       that can match no characters with a quantifier that has no upper limit,
       for example:

         (a?)*

       Earlier versions of Perl and PCRE used to give an error at compile time
       for such patterns. However, because there are cases where this  can  be
       useful,  such  patterns  are now accepted, but if any repetition of the
       subpattern does in fact match no characters, the loop is forcibly  bro-
       ken.

       By  default,  the quantifiers are "greedy", that is, they match as much
       as possible (up to the maximum  number  of  permitted  times),  without
       causing  the  rest of the pattern to fail. The classic example of where
       this gives problems is in trying to match comments in C programs. These
       appear  between  /*  and  */ and within the comment, individual * and /
       characters may appear. An attempt to match C comments by  applying  the
       pattern

         /\*.*\*/

       to the string

         /* first comment */  not comment  /* second comment */

       fails,  because it matches the entire string owing to the greediness of
       the .*  item.

       However, if a quantifier is followed by a question mark, it  ceases  to
       be greedy, and instead matches the minimum number of times possible, so
       the pattern

         /\*.*?\*/

       does the right thing with the C comments. The meaning  of  the  various
       quantifiers  is  not  otherwise  changed,  just the preferred number of
       matches.  Do not confuse this use of question mark with its  use  as  a
       quantifier  in its own right. Because it has two uses, it can sometimes
       appear doubled, as in

         \d??\d

       which matches one digit by preference, but can match two if that is the
       only way the rest of the pattern matches.

       If  the PCRE_UNGREEDY option is set (an option that is not available in
       Perl), the quantifiers are not greedy by default, but  individual  ones
       can  be  made  greedy  by following them with a question mark. In other
       words, it inverts the default behaviour.

       When a parenthesized subpattern is quantified  with  a  minimum  repeat
       count  that is greater than 1 or with a limited maximum, more memory is
       required for the compiled pattern, in proportion to  the  size  of  the
       minimum or maximum.

       If a pattern starts with .* or .{0,} and the PCRE_DOTALL option (equiv-
       alent to Perl's /s) is set, thus allowing the dot  to  match  newlines,
       the  pattern  is  implicitly anchored, because whatever follows will be
       tried against every character position in the subject string, so  there
       is  no  point  in  retrying the overall match at any position after the
       first. PCRE normally treats such a pattern as though it  were  preceded
       by \A.

       In  cases  where  it  is known that the subject string contains no new-
       lines, it is worth setting PCRE_DOTALL in order to  obtain  this  opti-
       mization, or alternatively using ^ to indicate anchoring explicitly.

       However,  there is one situation where the optimization cannot be used.
       When .*  is inside capturing parentheses that are the subject of a back
       reference elsewhere in the pattern, a match at the start may fail where
       a later one succeeds. Consider, for example:

         (.*)abc\1

       If the subject is "xyz123abc123" the match point is the fourth  charac-
       ter. For this reason, such a pattern is not implicitly anchored.

       When a capturing subpattern is repeated, the value captured is the sub-
       string that matched the final iteration. For example, after

         (tweedle[dume]{3}\s*)+

       has matched "tweedledum tweedledee" the value of the captured substring
       is  "tweedledee".  However,  if there are nested capturing subpatterns,
       the corresponding captured values may have been set in previous  itera-
       tions. For example, after

         /(a|(b))+/

       matches "aba" the value of the second captured substring is "b".


ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS

       With  both  maximizing ("greedy") and minimizing ("ungreedy" or "lazy")
       repetition, failure of what follows normally causes the  repeated  item
       to  be  re-evaluated to see if a different number of repeats allows the
       rest of the pattern to match. Sometimes it is useful to  prevent  this,
       either  to  change the nature of the match, or to cause it fail earlier
       than it otherwise might, when the author of the pattern knows there  is
       no point in carrying on.

       Consider,  for  example, the pattern \d+foo when applied to the subject
       line

         123456bar

       After matching all 6 digits and then failing to match "foo", the normal
       action  of  the matcher is to try again with only 5 digits matching the
       \d+ item, and then with  4,  and  so  on,  before  ultimately  failing.
       "Atomic  grouping"  (a  term taken from Jeffrey Friedl's book) provides
       the means for specifying that once a subpattern has matched, it is  not
       to be re-evaluated in this way.

       If  we  use atomic grouping for the previous example, the matcher gives
       up immediately on failing to match "foo" the first time.  The  notation
       is a kind of special parenthesis, starting with (?> as in this example:

         (?>\d+)foo

       This  kind  of  parenthesis "locks up" the  part of the pattern it con-
       tains once it has matched, and a failure further into  the  pattern  is
       prevented  from  backtracking into it. Backtracking past it to previous
       items, however, works as normal.

       An alternative description is that a subpattern of  this  type  matches
       the  string  of  characters  that an identical standalone pattern would
       match, if anchored at the current point in the subject string.

       Atomic grouping subpatterns are not capturing subpatterns. Simple cases
       such as the above example can be thought of as a maximizing repeat that
       must swallow everything it can. So, while both \d+ and  \d+?  are  pre-
       pared  to  adjust  the number of digits they match in order to make the
       rest of the pattern match, (?>\d+) can only match an entire sequence of
       digits.

       Atomic  groups in general can of course contain arbitrarily complicated
       subpatterns, and can be nested. However, when  the  subpattern  for  an
       atomic group is just a single repeated item, as in the example above, a
       simpler notation, called a "possessive quantifier" can  be  used.  This
       consists  of  an  additional  + character following a quantifier. Using
       this notation, the previous example can be rewritten as

         \d++foo

       Note that a possessive quantifier can be used with an entire group, for
       example:

         (abc|xyz){2,3}+

       Possessive   quantifiers   are   always  greedy;  the  setting  of  the
       PCRE_UNGREEDY option is ignored. They are a convenient notation for the
       simpler  forms  of atomic group. However, there is no difference in the
       meaning of a possessive quantifier and  the  equivalent  atomic  group,
       though  there  may  be a performance difference; possessive quantifiers
       should be slightly faster.

       The possessive quantifier syntax is an extension to the Perl  5.8  syn-
       tax.   Jeffrey  Friedl  originated the idea (and the name) in the first
       edition of his book. Mike McCloskey liked it, so implemented it when he
       built  Sun's Java package, and PCRE copied it from there. It ultimately
       found its way into Perl at release 5.10.

       PCRE has an optimization that automatically "possessifies" certain sim-
       ple  pattern  constructs.  For  example, the sequence A+B is treated as
       A++B because there is no point in backtracking into a sequence  of  A's
       when B must follow.

       When  a  pattern  contains an unlimited repeat inside a subpattern that
       can itself be repeated an unlimited number of  times,  the  use  of  an
       atomic  group  is  the  only way to avoid some failing matches taking a
       very long time indeed. The pattern

         (\D+|<\d+>)*[!?]

       matches an unlimited number of substrings that either consist  of  non-
       digits,  or  digits  enclosed in <>, followed by either ! or ?. When it
       matches, it runs quickly. However, if it is applied to

         aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa

       it takes a long time before reporting  failure.  This  is  because  the
       string  can be divided between the internal \D+ repeat and the external
       * repeat in a large number of ways, and all  have  to  be  tried.  (The
       example  uses  [!?]  rather than a single character at the end, because
       both PCRE and Perl have an optimization that allows  for  fast  failure
       when  a single character is used. They remember the last single charac-
       ter that is required for a match, and fail early if it is  not  present
       in  the  string.)  If  the pattern is changed so that it uses an atomic
       group, like this:

         ((?>\D+)|<\d+>)*[!?]

       sequences of non-digits cannot be broken, and failure happens quickly.


BACK REFERENCES

       Outside a character class, a backslash followed by a digit greater than
       0 (and possibly further digits) is a back reference to a capturing sub-
       pattern earlier (that is, to its left) in the pattern,  provided  there
       have been that many previous capturing left parentheses.

       However, if the decimal number following the backslash is less than 10,
       it is always taken as a back reference, and causes  an  error  only  if
       there  are  not that many capturing left parentheses in the entire pat-
       tern. In other words, the parentheses that are referenced need  not  be
       to  the left of the reference for numbers less than 10. A "forward back
       reference" of this type can make sense when a  repetition  is  involved
       and  the  subpattern to the right has participated in an earlier itera-
       tion.

       It is not possible to have a numerical "forward back  reference"  to  a
       subpattern  whose  number  is  10  or  more using this syntax because a
       sequence such as \50 is interpreted as a character  defined  in  octal.
       See the subsection entitled "Non-printing characters" above for further
       details of the handling of digits following a backslash.  There  is  no
       such  problem  when named parentheses are used. A back reference to any
       subpattern is possible using named parentheses (see below).

       Another way of avoiding the ambiguity inherent in  the  use  of  digits
       following a backslash is to use the \g escape sequence, which is a fea-
       ture introduced in Perl 5.10.  This  escape  must  be  followed  by  an
       unsigned  number  or  a negative number, optionally enclosed in braces.
       These examples are all identical:

         (ring), \1
         (ring), \g1
         (ring), \g{1}

       An unsigned number specifies an absolute reference without the  ambigu-
       ity that is present in the older syntax. It is also useful when literal
       digits follow the reference. A negative number is a relative reference.
       Consider this example:

         (abc(def)ghi)\g{-1}

       The sequence \g{-1} is a reference to the most recently started captur-
       ing subpattern before \g, that is, is it equivalent to  \2.  Similarly,
       \g{-2} would be equivalent to \1. The use of relative references can be
       helpful in long patterns, and also in  patterns  that  are  created  by
       joining together fragments that contain references within themselves.

       A  back  reference matches whatever actually matched the capturing sub-
       pattern in the current subject string, rather  than  anything  matching
       the subpattern itself (see "Subpatterns as subroutines" below for a way
       of doing that). So the pattern

         (sens|respons)e and \1ibility

       matches "sense and sensibility" and "response and responsibility",  but
       not  "sense and responsibility". If caseful matching is in force at the
       time of the back reference, the case of letters is relevant. For  exam-
       ple,

         ((?i)rah)\s+\1

       matches  "rah  rah"  and  "RAH RAH", but not "RAH rah", even though the
       original capturing subpattern is matched caselessly.

       There are several different ways of writing back  references  to  named
       subpatterns.  The  .NET syntax \k{name} and the Perl syntax \k<name> or
       \k'name' are supported, as is the Python syntax (?P=name). Perl  5.10's
       unified back reference syntax, in which \g can be used for both numeric
       and named references, is also supported. We  could  rewrite  the  above
       example in any of the following ways:

         (?<p1>(?i)rah)\s+\k<p1>
         (?'p1'(?i)rah)\s+\k{p1}
         (?P<p1>(?i)rah)\s+(?P=p1)
         (?<p1>(?i)rah)\s+\g{p1}

       A  subpattern  that  is  referenced  by  name may appear in the pattern
       before or after the reference.

       There may be more than one back reference to the same subpattern. If  a
       subpattern  has  not actually been used in a particular match, any back
       references to it always fail by default. For example, the pattern

         (a|(bc))\2

       always fails if it starts to match "a" rather than  "bc".  However,  if
       the PCRE_JAVASCRIPT_COMPAT option is set at compile time, a back refer-
       ence to an unset value matches an empty string.

       Because there may be many capturing parentheses in a pattern, all  dig-
       its  following a backslash are taken as part of a potential back refer-
       ence number.  If the pattern continues with  a  digit  character,  some
       delimiter  must  be  used  to  terminate  the  back  reference.  If the
       PCRE_EXTENDED option is set, this can be whitespace. Otherwise, the \g{
       syntax or an empty comment (see "Comments" below) can be used.

   Recursive back references

       A  back reference that occurs inside the parentheses to which it refers
       fails when the subpattern is first used, so, for example,  (a\1)  never
       matches.   However,  such references can be useful inside repeated sub-
       patterns. For example, the pattern

         (a|b\1)+

       matches any number of "a"s and also "aba", "ababbaa" etc. At each iter-
       ation  of  the  subpattern,  the  back  reference matches the character
       string corresponding to the previous iteration. In order  for  this  to
       work,  the  pattern must be such that the first iteration does not need
       to match the back reference. This can be done using alternation, as  in
       the example above, or by a quantifier with a minimum of zero.

       Back  references of this type cause the group that they reference to be
       treated as an atomic group.  Once the whole group has been  matched,  a
       subsequent  matching  failure cannot cause backtracking into the middle
       of the group.


ASSERTIONS

       An assertion is a test on the characters  following  or  preceding  the
       current  matching  point that does not actually consume any characters.
       The simple assertions coded as \b, \B, \A, \G, \Z,  \z,  ^  and  $  are
       described above.

       More  complicated  assertions  are  coded as subpatterns. There are two
       kinds: those that look ahead of the current  position  in  the  subject
       string,  and  those  that  look  behind  it. An assertion subpattern is
       matched in the normal way, except that it does not  cause  the  current
       matching position to be changed.

       Assertion  subpatterns  are  not  capturing subpatterns, and may not be
       repeated, because it makes no sense to assert the  same  thing  several
       times.  If  any kind of assertion contains capturing subpatterns within
       it, these are counted for the purposes of numbering the capturing  sub-
       patterns in the whole pattern.  However, substring capturing is carried
       out only for positive assertions, because it does not  make  sense  for
       negative assertions.

   Lookahead assertions

       Lookahead assertions start with (?= for positive assertions and (?! for
       negative assertions. For example,

         \w+(?=;)

       matches a word followed by a semicolon, but does not include the  semi-
       colon in the match, and

         foo(?!bar)

       matches  any  occurrence  of  "foo" that is not followed by "bar". Note
       that the apparently similar pattern

         (?!foo)bar

       does not find an occurrence of "bar"  that  is  preceded  by  something
       other  than "foo"; it finds any occurrence of "bar" whatsoever, because
       the assertion (?!foo) is always true when the next three characters are
       "bar". A lookbehind assertion is needed to achieve the other effect.

       If you want to force a matching failure at some point in a pattern, the
       most convenient way to do it is  with  (?!)  because  an  empty  string
       always  matches, so an assertion that requires there not to be an empty
       string must always fail.   The  Perl  5.10  backtracking  control  verb
       (*FAIL) or (*F) is essentially a synonym for (?!).

   Lookbehind assertions

       Lookbehind  assertions start with (?<= for positive assertions and (?<!
       for negative assertions. For example,

         (?<!foo)bar

       does find an occurrence of "bar" that is not  preceded  by  "foo".  The
       contents  of  a  lookbehind  assertion are restricted such that all the
       strings it matches must have a fixed length. However, if there are sev-
       eral  top-level  alternatives,  they  do  not all have to have the same
       fixed length. Thus

         (?<=bullock|donkey)

       is permitted, but

         (?<!dogs?|cats?)

       causes an error at compile time. Branches that match  different  length
       strings  are permitted only at the top level of a lookbehind assertion.
       This is an extension compared with Perl (5.8 and 5.10), which  requires
       all branches to match the same length of string. An assertion such as

         (?<=ab(c|de))

       is  not  permitted,  because  its single top-level branch can match two
       different lengths, but it is acceptable to PCRE if rewritten to use two
       top-level branches:

         (?<=abc|abde)

       In some cases, the Perl 5.10 escape sequence \K (see above) can be used
       instead of  a  lookbehind  assertion  to  get  round  the  fixed-length
       restriction.

       The  implementation  of lookbehind assertions is, for each alternative,
       to temporarily move the current position back by the fixed  length  and
       then try to match. If there are insufficient characters before the cur-
       rent position, the assertion fails.

       PCRE does not allow the \C escape (which matches a single byte in UTF-8
       mode)  to appear in lookbehind assertions, because it makes it impossi-
       ble to calculate the length of the lookbehind. The \X and  \R  escapes,
       which can match different numbers of bytes, are also not permitted.

       "Subroutine"  calls  (see below) such as (?2) or (?&X) are permitted in
       lookbehinds, as long as the subpattern matches a  fixed-length  string.
       Recursion, however, is not supported.

       Possessive  quantifiers  can  be  used  in  conjunction with lookbehind
       assertions to specify efficient matching of fixed-length strings at the
       end of subject strings. Consider a simple pattern such as

         abcd$

       when  applied  to  a  long string that does not match. Because matching
       proceeds from left to right, PCRE will look for each "a" in the subject
       and  then  see  if what follows matches the rest of the pattern. If the
       pattern is specified as

         ^.*abcd$

       the initial .* matches the entire string at first, but when this  fails
       (because there is no following "a"), it backtracks to match all but the
       last character, then all but the last two characters, and so  on.  Once
       again  the search for "a" covers the entire string, from right to left,
       so we are no better off. However, if the pattern is written as

         ^.*+(?<=abcd)

       there can be no backtracking for the .*+ item; it can  match  only  the
       entire  string.  The subsequent lookbehind assertion does a single test
       on the last four characters. If it fails, the match fails  immediately.
       For  long  strings, this approach makes a significant difference to the
       processing time.

   Using multiple assertions

       Several assertions (of any sort) may occur in succession. For example,

         (?<=\d{3})(?<!999)foo

       matches "foo" preceded by three digits that are not "999". Notice  that
       each  of  the  assertions is applied independently at the same point in
       the subject string. First there is a  check  that  the  previous  three
       characters  are  all  digits,  and  then there is a check that the same
       three characters are not "999".  This pattern does not match "foo" pre-
       ceded  by  six  characters,  the first of which are digits and the last
       three of which are not "999". For example, it  doesn't  match  "123abc-
       foo". A pattern to do that is

         (?<=\d{3}...)(?<!999)foo

       This  time  the  first assertion looks at the preceding six characters,
       checking that the first three are digits, and then the second assertion
       checks that the preceding three characters are not "999".

       Assertions can be nested in any combination. For example,

         (?<=(?<!foo)bar)baz

       matches  an occurrence of "baz" that is preceded by "bar" which in turn
       is not preceded by "foo", while

         (?<=\d{3}(?!999)...)foo

       is another pattern that matches "foo" preceded by three digits and  any
       three characters that are not "999".


CONDITIONAL SUBPATTERNS

       It  is possible to cause the matching process to obey a subpattern con-
       ditionally or to choose between two alternative subpatterns,  depending
       on  the result of an assertion, or whether a specific capturing subpat-
       tern has already been matched. The two possible  forms  of  conditional
       subpattern are:

         (?(condition)yes-pattern)
         (?(condition)yes-pattern|no-pattern)

       If  the  condition is satisfied, the yes-pattern is used; otherwise the
       no-pattern (if present) is used. If there are more  than  two  alterna-
       tives in the subpattern, a compile-time error occurs.

       There  are  four  kinds of condition: references to subpatterns, refer-
       ences to recursion, a pseudo-condition called DEFINE, and assertions.

   Checking for a used subpattern by number

       If the text between the parentheses consists of a sequence  of  digits,
       the condition is true if a capturing subpattern of that number has pre-
       viously matched. If there is more than one  capturing  subpattern  with
       the  same  number  (see  the earlier section about duplicate subpattern
       numbers), the condition is true if any of them have been set. An alter-
       native  notation is to precede the digits with a plus or minus sign. In
       this case, the subpattern number is relative rather than absolute.  The
       most  recently opened parentheses can be referenced by (?(-1), the next
       most recent by (?(-2), and so on. In looping  constructs  it  can  also
       make  sense  to  refer  to  subsequent  groups  with constructs such as
       (?(+2).

       Consider the following pattern, which  contains  non-significant  white
       space to make it more readable (assume the PCRE_EXTENDED option) and to
       divide it into three parts for ease of discussion:

         ( \( )?    [^()]+    (?(1) \) )

       The first part matches an optional opening  parenthesis,  and  if  that
       character is present, sets it as the first captured substring. The sec-
       ond part matches one or more characters that are not  parentheses.  The
       third part is a conditional subpattern that tests whether the first set
       of parentheses matched or not. If they did, that is, if subject started
       with an opening parenthesis, the condition is true, and so the yes-pat-
       tern is executed and a  closing  parenthesis  is  required.  Otherwise,
       since  no-pattern  is  not  present, the subpattern matches nothing. In
       other words,  this  pattern  matches  a  sequence  of  non-parentheses,
       optionally enclosed in parentheses.

       If  you  were  embedding  this pattern in a larger one, you could use a
       relative reference:

         ...other stuff... ( \( )?    [^()]+    (?(-1) \) ) ...

       This makes the fragment independent of the parentheses  in  the  larger
       pattern.

   Checking for a used subpattern by name

       Perl  uses  the  syntax  (?(<name>)...) or (?('name')...) to test for a
       used subpattern by name. For compatibility  with  earlier  versions  of
       PCRE,  which  had this facility before Perl, the syntax (?(name)...) is
       also recognized. However, there is a possible ambiguity with this  syn-
       tax,  because  subpattern  names  may  consist entirely of digits. PCRE
       looks first for a named subpattern; if it cannot find one and the  name
       consists  entirely  of digits, PCRE looks for a subpattern of that num-
       ber, which must be greater than zero. Using subpattern names that  con-
       sist entirely of digits is not recommended.

       Rewriting the above example to use a named subpattern gives this:

         (?<OPEN> \( )?    [^()]+    (?(<OPEN>) \) )

       If  the  name used in a condition of this kind is a duplicate, the test
       is applied to all subpatterns of the same name, and is true if any  one
       of them has matched.

   Checking for pattern recursion

       If the condition is the string (R), and there is no subpattern with the
       name R, the condition is true if a recursive call to the whole  pattern
       or any subpattern has been made. If digits or a name preceded by amper-
       sand follow the letter R, for example:

         (?(R3)...) or (?(R&name)...)

       the condition is true if the most recent recursion is into a subpattern
       whose number or name is given. This condition does not check the entire
       recursion stack. If the name used in a condition  of  this  kind  is  a
       duplicate, the test is applied to all subpatterns of the same name, and
       is true if any one of them is the most recent recursion.

       At "top level", all these recursion test  conditions  are  false.   The
       syntax for recursive patterns is described below.

   Defining subpatterns for use by reference only

       If  the  condition  is  the string (DEFINE), and there is no subpattern
       with the name DEFINE, the condition is  always  false.  In  this  case,
       there  may  be  only  one  alternative  in the subpattern. It is always
       skipped if control reaches this point  in  the  pattern;  the  idea  of
       DEFINE  is that it can be used to define "subroutines" that can be ref-
       erenced from elsewhere. (The use of "subroutines" is described  below.)
       For  example,  a pattern to match an IPv4 address could be written like
       this (ignore whitespace and line breaks):

         (?(DEFINE) (?<byte> 2[0-4]\d | 25[0-5] | 1\d\d | [1-9]?\d) )
         \b (?&byte) (\.(?&byte)){3} \b

       The first part of the pattern is a DEFINE group inside which a  another
       group  named "byte" is defined. This matches an individual component of
       an IPv4 address (a number less than 256). When  matching  takes  place,
       this  part  of  the pattern is skipped because DEFINE acts like a false
       condition. The rest of the pattern uses references to the  named  group
       to  match the four dot-separated components of an IPv4 address, insist-
       ing on a word boundary at each end.

   Assertion conditions

       If the condition is not in any of the above  formats,  it  must  be  an
       assertion.   This may be a positive or negative lookahead or lookbehind
       assertion. Consider  this  pattern,  again  containing  non-significant
       white space, and with the two alternatives on the second line:

         (?(?=[^a-z]*[a-z])
         \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} )

       The  condition  is  a  positive  lookahead  assertion  that  matches an
       optional sequence of non-letters followed by a letter. In other  words,
       it  tests  for the presence of at least one letter in the subject. If a
       letter is found, the subject is matched against the first  alternative;
       otherwise  it  is  matched  against  the  second.  This pattern matches
       strings in one of the two forms dd-aaa-dd or dd-dd-dd,  where  aaa  are
       letters and dd are digits.


COMMENTS

       The  sequence (?# marks the start of a comment that continues up to the
       next closing parenthesis. Nested parentheses  are  not  permitted.  The
       characters  that make up a comment play no part in the pattern matching
       at all.

       If the PCRE_EXTENDED option is set, an unescaped # character outside  a
       character  class  introduces  a  comment  that continues to immediately
       after the next newline in the pattern.


RECURSIVE PATTERNS

       Consider the problem of matching a string in parentheses, allowing  for
       unlimited  nested  parentheses.  Without the use of recursion, the best
       that can be done is to use a pattern that  matches  up  to  some  fixed
       depth  of  nesting.  It  is not possible to handle an arbitrary nesting
       depth.

       For some time, Perl has provided a facility that allows regular expres-
       sions  to recurse (amongst other things). It does this by interpolating
       Perl code in the expression at run time, and the code can refer to  the
       expression itself. A Perl pattern using code interpolation to solve the
       parentheses problem can be created like this:

         $re = qr{\( (?: (?>[^()]+) | (?p{$re}) )* \)}x;

       The (?p{...}) item interpolates Perl code at run time, and in this case
       refers recursively to the pattern in which it appears.

       Obviously, PCRE cannot support the interpolation of Perl code. Instead,
       it supports special syntax for recursion of  the  entire  pattern,  and
       also  for  individual  subpattern  recursion. After its introduction in
       PCRE and Python, this kind of  recursion  was  subsequently  introduced
       into Perl at release 5.10.

       A  special  item  that consists of (? followed by a number greater than
       zero and a closing parenthesis is a recursive call of the subpattern of
       the  given  number, provided that it occurs inside that subpattern. (If
       not, it is a "subroutine" call, which is described  in  the  next  sec-
       tion.)  The special item (?R) or (?0) is a recursive call of the entire
       regular expression.

       This PCRE pattern solves the nested  parentheses  problem  (assume  the
       PCRE_EXTENDED option is set so that white space is ignored):

         \( ( [^()]++ | (?R) )* \)

       First  it matches an opening parenthesis. Then it matches any number of
       substrings which can either be a  sequence  of  non-parentheses,  or  a
       recursive  match  of the pattern itself (that is, a correctly parenthe-
       sized substring).  Finally there is a closing parenthesis. Note the use
       of a possessive quantifier to avoid backtracking into sequences of non-
       parentheses.

       If this were part of a larger pattern, you would not  want  to  recurse
       the entire pattern, so instead you could use this:

         ( \( ( [^()]++ | (?1) )* \) )

       We  have  put the pattern into parentheses, and caused the recursion to
       refer to them instead of the whole pattern.

       In a larger pattern,  keeping  track  of  parenthesis  numbers  can  be
       tricky.  This  is made easier by the use of relative references (a Perl
       5.10 feature).  Instead of (?1) in the  pattern  above  you  can  write
       (?-2) to refer to the second most recently opened parentheses preceding
       the recursion. In other  words,  a  negative  number  counts  capturing
       parentheses leftwards from the point at which it is encountered.

       It  is  also  possible  to refer to subsequently opened parentheses, by
       writing references such as (?+2). However, these  cannot  be  recursive
       because  the  reference  is  not inside the parentheses that are refer-
       enced. They are always "subroutine" calls, as  described  in  the  next
       section.

       An  alternative  approach is to use named parentheses instead. The Perl
       syntax for this is (?&name); PCRE's earlier syntax  (?P>name)  is  also
       supported. We could rewrite the above example as follows:

         (?<pn> \( ( [^()]++ | (?&pn) )* \) )

       If  there  is more than one subpattern with the same name, the earliest
       one is used.

       This particular example pattern that we have been looking  at  contains
       nested unlimited repeats, and so the use of a possessive quantifier for
       matching strings of non-parentheses is important when applying the pat-
       tern  to  strings  that do not match. For example, when this pattern is
       applied to

         (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()

       it yields "no match" quickly. However, if a  possessive  quantifier  is
       not  used, the match runs for a very long time indeed because there are
       so many different ways the + and * repeats can carve  up  the  subject,
       and all have to be tested before failure can be reported.

       At  the  end  of a match, the values of capturing parentheses are those
       from the outermost level. If you want to obtain intermediate values,  a
       callout  function can be used (see below and the pcrecallout documenta-
       tion). If the pattern above is matched against

         (ab(cd)ef)

       the value for the inner capturing parentheses  (numbered  2)  is  "ef",
       which  is the last value taken on at the top level. If a capturing sub-
       pattern is not matched at the top level, its final value is unset, even
       if it is (temporarily) set at a deeper level.

       If  there are more than 15 capturing parentheses in a pattern, PCRE has
       to obtain extra memory to store data during a recursion, which it  does
       by using pcre_malloc, freeing it via pcre_free afterwards. If no memory
       can be obtained, the match fails with the PCRE_ERROR_NOMEMORY error.

       Do not confuse the (?R) item with the condition (R),  which  tests  for
       recursion.   Consider  this pattern, which matches text in angle brack-
       ets, allowing for arbitrary nesting. Only digits are allowed in  nested
       brackets  (that is, when recursing), whereas any characters are permit-
       ted at the outer level.

         < (?: (?(R) \d++  | [^<>]*+) | (?R)) * >

       In this pattern, (?(R) is the start of a conditional  subpattern,  with
       two  different  alternatives for the recursive and non-recursive cases.
       The (?R) item is the actual recursive call.

   Recursion difference from Perl

       In PCRE (like Python, but unlike Perl), a recursive subpattern call  is
       always treated as an atomic group. That is, once it has matched some of
       the subject string, it is never re-entered, even if it contains untried
       alternatives  and  there  is a subsequent matching failure. This can be
       illustrated by the following pattern, which purports to match a  palin-
       dromic  string  that contains an odd number of characters (for example,
       "a", "aba", "abcba", "abcdcba"):

         ^(.|(.)(?1)\2)$

       The idea is that it either matches a single character, or two identical
       characters  surrounding  a sub-palindrome. In Perl, this pattern works;
       in PCRE it does not if the pattern is  longer  than  three  characters.
       Consider the subject string "abcba":

       At  the  top level, the first character is matched, but as it is not at
       the end of the string, the first alternative fails; the second alterna-
       tive is taken and the recursion kicks in. The recursive call to subpat-
       tern 1 successfully matches the next character ("b").  (Note  that  the
       beginning and end of line tests are not part of the recursion).

       Back  at  the top level, the next character ("c") is compared with what
       subpattern 2 matched, which was "a". This fails. Because the  recursion
       is  treated  as  an atomic group, there are now no backtracking points,
       and so the entire match fails. (Perl is able, at  this  point,  to  re-
       enter  the  recursion  and try the second alternative.) However, if the
       pattern is written with the alternatives in the other order, things are
       different:

         ^((.)(?1)\2|.)$

       This  time,  the recursing alternative is tried first, and continues to
       recurse until it runs out of characters, at which point  the  recursion
       fails.  But  this  time  we  do  have another alternative to try at the
       higher level. That is the big difference:  in  the  previous  case  the
       remaining alternative is at a deeper recursion level, which PCRE cannot
       use.

       To change the pattern so that matches all palindromic strings, not just
       those  with  an  odd number of characters, it is tempting to change the
       pattern to this:

         ^((.)(?1)\2|.?)$

       Again, this works in Perl, but not in PCRE, and for  the  same  reason.
       When  a  deeper  recursion has matched a single character, it cannot be
       entered again in order to match an empty string.  The  solution  is  to
       separate  the two cases, and write out the odd and even cases as alter-
       natives at the higher level:

         ^(?:((.)(?1)\2|)|((.)(?3)\4|.))

       If you want to match typical palindromic phrases, the  pattern  has  to
       ignore all non-word characters, which can be done like this:

         ^\W*+(?:((.)\W*+(?1)\W*+\2|)|((.)\W*+(?3)\W*+\4|\W*+.\W*+))\W*+$

       If run with the PCRE_CASELESS option, this pattern matches phrases such
       as "A man, a plan, a canal: Panama!" and it works well in both PCRE and
       Perl.  Note the use of the possessive quantifier *+ to avoid backtrack-
       ing into sequences of non-word characters. Without this, PCRE  takes  a
       great  deal  longer  (ten  times or more) to match typical phrases, and
       Perl takes so long that you think it has gone into a loop.

       WARNING: The palindrome-matching patterns above work only if  the  sub-
       ject  string  does not start with a palindrome that is shorter than the
       entire string.  For example, although "abcba" is correctly matched,  if
       the  subject  is "ababa", PCRE finds the palindrome "aba" at the start,
       then fails at top level because the end of the string does not  follow.
       Once  again, it cannot jump back into the recursion to try other alter-
       natives, so the entire match fails.


SUBPATTERNS AS SUBROUTINES

       If the syntax for a recursive subpattern reference (either by number or
       by  name)  is used outside the parentheses to which it refers, it oper-
       ates like a subroutine in a programming language. The "called"  subpat-
       tern may be defined before or after the reference. A numbered reference
       can be absolute or relative, as in these examples:

         (...(absolute)...)...(?2)...
         (...(relative)...)...(?-1)...
         (...(?+1)...(relative)...

       An earlier example pointed out that the pattern

         (sens|respons)e and \1ibility

       matches "sense and sensibility" and "response and responsibility",  but
       not "sense and responsibility". If instead the pattern

         (sens|respons)e and (?1)ibility

       is  used, it does match "sense and responsibility" as well as the other
       two strings. Another example is  given  in  the  discussion  of  DEFINE
       above.

       Like  recursive  subpatterns, a subroutine call is always treated as an
       atomic group. That is, once it has matched some of the subject  string,
       it  is  never  re-entered, even if it contains untried alternatives and
       there is a subsequent matching failure. Any capturing parentheses  that
       are  set  during  the  subroutine  call revert to their previous values
       afterwards.

       When a subpattern is used as a subroutine, processing options  such  as
       case-independence are fixed when the subpattern is defined. They cannot
       be changed for different calls. For example, consider this pattern:

         (abc)(?i:(?-1))

       It matches "abcabc". It does not match "abcABC" because the  change  of
       processing option does not affect the called subpattern.


ONIGURUMA SUBROUTINE SYNTAX

       For  compatibility with Oniguruma, the non-Perl syntax \g followed by a
       name or a number enclosed either in angle brackets or single quotes, is
       an  alternative  syntax  for  referencing a subpattern as a subroutine,
       possibly recursively. Here are two of the examples used above,  rewrit-
       ten using this syntax:

         (?<pn> \( ( (?>[^()]+) | \g<pn> )* \) )
         (sens|respons)e and \g'1'ibility

       PCRE  supports  an extension to Oniguruma: if a number is preceded by a
       plus or a minus sign it is taken as a relative reference. For example:

         (abc)(?i:\g<-1>)

       Note that \g{...} (Perl syntax) and \g<...> (Oniguruma syntax) are  not
       synonymous.  The former is a back reference; the latter is a subroutine
       call.


CALLOUTS

       Perl has a feature whereby using the sequence (?{...}) causes arbitrary
       Perl  code to be obeyed in the middle of matching a regular expression.
       This makes it possible, amongst other things, to extract different sub-
       strings that match the same pair of parentheses when there is a repeti-
       tion.

       PCRE provides a similar feature, but of course it cannot obey arbitrary
       Perl code. The feature is called "callout". The caller of PCRE provides
       an external function by putting its entry point in the global  variable
       pcre_callout.   By default, this variable contains NULL, which disables
       all calling out.

       Within a regular expression, (?C) indicates the  points  at  which  the
       external  function  is  to be called. If you want to identify different
       callout points, you can put a number less than 256 after the letter  C.
       The  default  value is zero.  For example, this pattern has two callout
       points:

         (?C1)abc(?C2)def

       If the PCRE_AUTO_CALLOUT flag is passed to pcre_compile(), callouts are
       automatically  installed  before each item in the pattern. They are all
       numbered 255.

       During matching, when PCRE reaches a callout point (and pcre_callout is
       set),  the  external function is called. It is provided with the number
       of the callout, the position in the pattern, and, optionally, one  item
       of  data  originally supplied by the caller of pcre_exec(). The callout
       function may cause matching to proceed, to backtrack, or to fail  alto-
       gether. A complete description of the interface to the callout function
       is given in the pcrecallout documentation.


BACKTRACKING CONTROL

       Perl 5.10 introduced a number of "Special Backtracking Control  Verbs",
       which are described in the Perl documentation as "experimental and sub-
       ject to change or removal in a future version of Perl". It goes  on  to
       say:  "Their usage in production code should be noted to avoid problems
       during upgrades." The same remarks apply to the PCRE features described
       in this section.

       Since  these  verbs  are  specifically related to backtracking, most of
       them can be  used  only  when  the  pattern  is  to  be  matched  using
       pcre_exec(), which uses a backtracking algorithm. With the exception of
       (*FAIL), which behaves like a failing negative assertion, they cause an
       error if encountered by pcre_dfa_exec().

       If any of these verbs are used in an assertion or subroutine subpattern
       (including recursive subpatterns), their effect  is  confined  to  that
       subpattern;  it  does  not extend to the surrounding pattern. Note that
       such subpatterns are processed as anchored at the point where they  are
       tested.

       The  new verbs make use of what was previously invalid syntax: an open-
       ing parenthesis followed by an asterisk. In Perl, they are generally of
       the form (*VERB:ARG) but PCRE does not support the use of arguments, so
       its general form is just (*VERB). Any number of these verbs  may  occur
       in a pattern. There are two kinds:

   Verbs that act immediately

       The following verbs act as soon as they are encountered:

          (*ACCEPT)

       This  verb causes the match to end successfully, skipping the remainder
       of the pattern. When inside a recursion, only the innermost pattern  is
       ended  immediately.  If  (*ACCEPT) is inside capturing parentheses, the
       data so far is captured. (This feature was added  to  PCRE  at  release
       8.00.) For example:

         A((?:A|B(*ACCEPT)|C)D)

       This  matches  "AB", "AAD", or "ACD"; when it matches "AB", "B" is cap-
       tured by the outer parentheses.

         (*FAIL) or (*F)

       This verb causes the match to fail, forcing backtracking to  occur.  It
       is  equivalent to (?!) but easier to read. The Perl documentation notes
       that it is probably useful only when combined  with  (?{})  or  (??{}).
       Those  are,  of course, Perl features that are not present in PCRE. The
       nearest equivalent is the callout feature, as for example in this  pat-
       tern:

         a+(?C)(*FAIL)

       A  match  with the string "aaaa" always fails, but the callout is taken
       before each backtrack happens (in this example, 10 times).

   Verbs that act after backtracking

       The following verbs do nothing when they are encountered. Matching con-
       tinues  with what follows, but if there is no subsequent match, a fail-
       ure is forced.  The verbs  differ  in  exactly  what  kind  of  failure
       occurs.

         (*COMMIT)

       This  verb  causes  the whole match to fail outright if the rest of the
       pattern does not match. Even if the pattern is unanchored,  no  further
       attempts  to  find  a match by advancing the starting point take place.
       Once (*COMMIT) has been passed, pcre_exec() is committed to  finding  a
       match at the current starting point, or not at all. For example:

         a+(*COMMIT)b

       This  matches  "xxaab" but not "aacaab". It can be thought of as a kind
       of dynamic anchor, or "I've started, so I must finish."

         (*PRUNE)

       This verb causes the match to fail at the current position if the  rest
       of the pattern does not match. If the pattern is unanchored, the normal
       "bumpalong" advance to the next starting character then happens.  Back-
       tracking  can  occur as usual to the left of (*PRUNE), or when matching
       to the right of (*PRUNE), but if there is no match to the right,  back-
       tracking  cannot  cross (*PRUNE).  In simple cases, the use of (*PRUNE)
       is just an alternative to an atomic group or possessive quantifier, but
       there  are  some uses of (*PRUNE) that cannot be expressed in any other
       way.

         (*SKIP)

       This verb is like (*PRUNE), except that if the pattern  is  unanchored,
       the  "bumpalong" advance is not to the next character, but to the posi-
       tion in the subject where (*SKIP) was  encountered.  (*SKIP)  signifies
       that  whatever  text  was  matched leading up to it cannot be part of a
       successful match. Consider:

         a+(*SKIP)b

       If the subject is "aaaac...",  after  the  first  match  attempt  fails
       (starting  at  the  first  character in the string), the starting point
       skips on to start the next attempt at "c". Note that a possessive quan-
       tifer  does not have the same effect as this example; although it would
       suppress backtracking  during  the  first  match  attempt,  the  second
       attempt  would  start at the second character instead of skipping on to
       "c".

         (*THEN)

       This verb causes a skip to the next alternation if the rest of the pat-
       tern does not match. That is, it cancels pending backtracking, but only
       within the current alternation. Its name  comes  from  the  observation
       that it can be used for a pattern-based if-then-else block:

         ( COND1 (*THEN) FOO | COND2 (*THEN) BAR | COND3 (*THEN) BAZ ) ...

       If  the COND1 pattern matches, FOO is tried (and possibly further items
       after the end of the group if FOO succeeds);  on  failure  the  matcher
       skips  to  the second alternative and tries COND2, without backtracking
       into COND1. If (*THEN) is used outside  of  any  alternation,  it  acts
       exactly like (*PRUNE).


SEE ALSO

       pcreapi(3), pcrecallout(3), pcrematching(3), pcresyntax(3), pcre(3).


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 06 March 2010
       Copyright (c) 1997-2010 University of Cambridge.
------------------------------------------------------------------------------


PCRESYNTAX(3)                                                    PCRESYNTAX(3)


NAME
       PCRE - Perl-compatible regular expressions


PCRE REGULAR EXPRESSION SYNTAX SUMMARY

       The  full syntax and semantics of the regular expressions that are sup-
       ported by PCRE are described in  the  pcrepattern  documentation.  This
       document contains just a quick-reference summary of the syntax.


QUOTING

         \x         where x is non-alphanumeric is a literal x
         \Q...\E    treat enclosed characters as literal


CHARACTERS

         \a         alarm, that is, the BEL character (hex 07)
         \cx        "control-x", where x is any character
         \e         escape (hex 1B)
         \f         formfeed (hex 0C)
         \n         newline (hex 0A)
         \r         carriage return (hex 0D)
         \t         tab (hex 09)
         \ddd       character with octal code ddd, or backreference
         \xhh       character with hex code hh
         \x{hhh..}  character with hex code hhh..


CHARACTER TYPES

         .          any character except newline;
                      in dotall mode, any character whatsoever
         \C         one byte, even in UTF-8 mode (best avoided)
         \d         a decimal digit
         \D         a character that is not a decimal digit
         \h         a horizontal whitespace character
         \H         a character that is not a horizontal whitespace character
         \p{xx}     a character with the xx property
         \P{xx}     a character without the xx property
         \R         a newline sequence
         \s         a whitespace character
         \S         a character that is not a whitespace character
         \v         a vertical whitespace character
         \V         a character that is not a vertical whitespace character
         \w         a "word" character
         \W         a "non-word" character
         \X         an extended Unicode sequence

       In PCRE, \d, \D, \s, \S, \w, and \W recognize only ASCII characters.


GENERAL CATEGORY PROPERTY CODES FOR \p and \P

         C          Other
         Cc         Control
         Cf         Format
         Cn         Unassigned
         Co         Private use
         Cs         Surrogate

         L          Letter
         Ll         Lower case letter
         Lm         Modifier letter
         Lo         Other letter
         Lt         Title case letter
         Lu         Upper case letter
         L&         Ll, Lu, or Lt

         M          Mark
         Mc         Spacing mark
         Me         Enclosing mark
         Mn         Non-spacing mark

         N          Number
         Nd         Decimal number
         Nl         Letter number
         No         Other number

         P          Punctuation
         Pc         Connector punctuation
         Pd         Dash punctuation
         Pe         Close punctuation
         Pf         Final punctuation
         Pi         Initial punctuation
         Po         Other punctuation
         Ps         Open punctuation

         S          Symbol
         Sc         Currency symbol
         Sk         Modifier symbol
         Sm         Mathematical symbol
         So         Other symbol

         Z          Separator
         Zl         Line separator
         Zp         Paragraph separator
         Zs         Space separator


SCRIPT NAMES FOR \p AND \P

       Arabic, Armenian, Avestan, Balinese, Bamum, Bengali, Bopomofo, Braille,
       Buginese, Buhid, Canadian_Aboriginal, Carian, Cham,  Cherokee,  Common,
       Coptic,   Cuneiform,  Cypriot,  Cyrillic,  Deseret,  Devanagari,  Egyp-
       tian_Hieroglyphs,  Ethiopic,  Georgian,  Glagolitic,   Gothic,   Greek,
       Gujarati,  Gurmukhi,  Han,  Hangul,  Hanunoo,  Hebrew,  Hiragana, Impe-
       rial_Aramaic, Inherited, Inscriptional_Pahlavi, Inscriptional_Parthian,
       Javanese,  Kaithi, Kannada, Katakana, Kayah_Li, Kharoshthi, Khmer, Lao,
       Latin,  Lepcha,  Limbu,  Linear_B,  Lisu,  Lycian,  Lydian,  Malayalam,
       Meetei_Mayek,  Mongolian, Myanmar, New_Tai_Lue, Nko, Ogham, Old_Italic,
       Old_Persian, Old_South_Arabian, Old_Turkic, Ol_Chiki,  Oriya,  Osmanya,
       Phags_Pa,  Phoenician,  Rejang,  Runic, Samaritan, Saurashtra, Shavian,
       Sinhala, Sundanese, Syloti_Nagri, Syriac,  Tagalog,  Tagbanwa,  Tai_Le,
       Tai_Tham,  Tai_Viet,  Tamil,  Telugu,  Thaana, Thai, Tibetan, Tifinagh,
       Ugaritic, Vai, Yi.


CHARACTER CLASSES

         [...]       positive character class
         [^...]      negative character class
         [x-y]       range (can be used for hex characters)
         [[:xxx:]]   positive POSIX named set
         [[:^xxx:]]  negative POSIX named set

         alnum       alphanumeric
         alpha       alphabetic
         ascii       0-127
         blank       space or tab
         cntrl       control character
         digit       decimal digit
         graph       printing, excluding space
         lower       lower case letter
         print       printing, including space
         punct       printing, excluding alphanumeric
         space       whitespace
         upper       upper case letter
         word        same as \w
         xdigit      hexadecimal digit

       In PCRE, POSIX character set names recognize only ASCII characters. You
       can use \Q...\E inside a character class.


QUANTIFIERS

         ?           0 or 1, greedy
         ?+          0 or 1, possessive
         ??          0 or 1, lazy
         *           0 or more, greedy
         *+          0 or more, possessive
         *?          0 or more, lazy
         +           1 or more, greedy
         ++          1 or more, possessive
         +?          1 or more, lazy
         {n}         exactly n
         {n,m}       at least n, no more than m, greedy
         {n,m}+      at least n, no more than m, possessive
         {n,m}?      at least n, no more than m, lazy
         {n,}        n or more, greedy
         {n,}+       n or more, possessive
         {n,}?       n or more, lazy


ANCHORS AND SIMPLE ASSERTIONS

         \b          word boundary (only ASCII letters recognized)
         \B          not a word boundary
         ^           start of subject
                      also after internal newline in multiline mode
         \A          start of subject
         $           end of subject
                      also before newline at end of subject
                      also before internal newline in multiline mode
         \Z          end of subject
                      also before newline at end of subject
         \z          end of subject
         \G          first matching position in subject


MATCH POINT RESET

         \K          reset start of match


ALTERNATION

         expr|expr|expr...


CAPTURING

         (...)           capturing group
         (?<name>...)    named capturing group (Perl)
         (?'name'...)    named capturing group (Perl)
         (?P<name>...)   named capturing group (Python)
         (?:...)         non-capturing group
         (?|...)         non-capturing group; reset group numbers for
                          capturing groups in each alternative


ATOMIC GROUPS

         (?>...)         atomic, non-capturing group


COMMENT

         (?#....)        comment (not nestable)


OPTION SETTING

         (?i)            caseless
         (?J)            allow duplicate names
         (?m)            multiline
         (?s)            single line (dotall)
         (?U)            default ungreedy (lazy)
         (?x)            extended (ignore white space)
         (?-...)         unset option(s)

       The following is recognized only at the start of a pattern or after one
       of the newline-setting options with similar syntax:

         (*UTF8)         set UTF-8 mode


LOOKAHEAD AND LOOKBEHIND ASSERTIONS

         (?=...)         positive look ahead
         (?!...)         negative look ahead
         (?<=...)        positive look behind
         (?<!...)        negative look behind

       Each top-level branch of a look behind must be of a fixed length.


BACKREFERENCES

         \n              reference by number (can be ambiguous)
         \gn             reference by number
         \g{n}           reference by number
         \g{-n}          relative reference by number
         \k<name>        reference by name (Perl)
         \k'name'        reference by name (Perl)
         \g{name}        reference by name (Perl)
         \k{name}        reference by name (.NET)
         (?P=name)       reference by name (Python)


SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)

         (?R)            recurse whole pattern
         (?n)            call subpattern by absolute number
         (?+n)           call subpattern by relative number
         (?-n)           call subpattern by relative number
         (?&name)        call subpattern by name (Perl)
         (?P>name)       call subpattern by name (Python)
         \g<name>        call subpattern by name (Oniguruma)
         \g'name'        call subpattern by name (Oniguruma)
         \g<n>           call subpattern by absolute number (Oniguruma)
         \g'n'           call subpattern by absolute number (Oniguruma)
         \g<+n>          call subpattern by relative number (PCRE extension)
         \g'+n'          call subpattern by relative number (PCRE extension)
         \g<-n>          call subpattern by relative number (PCRE extension)
         \g'-n'          call subpattern by relative number (PCRE extension)


CONDITIONAL PATTERNS

         (?(condition)yes-pattern)
         (?(condition)yes-pattern|no-pattern)

         (?(n)...        absolute reference condition
         (?(+n)...       relative reference condition
         (?(-n)...       relative reference condition
         (?(<name>)...   named reference condition (Perl)
         (?('name')...   named reference condition (Perl)
         (?(name)...     named reference condition (PCRE)
         (?(R)...        overall recursion condition
         (?(Rn)...       specific group recursion condition
         (?(R&name)...   specific recursion condition
         (?(DEFINE)...   define subpattern for reference
         (?(assert)...   assertion condition


BACKTRACKING CONTROL

       The following act immediately they are reached:

         (*ACCEPT)       force successful match
         (*FAIL)         force backtrack; synonym (*F)

       The following act only when a subsequent match failure causes  a  back-
       track to reach them. They all force a match failure, but they differ in
       what happens afterwards. Those that advance the start-of-match point do
       so only if the pattern is not anchored.

         (*COMMIT)       overall failure, no advance of starting point
         (*PRUNE)        advance to next starting character
         (*SKIP)         advance start to current matching position
         (*THEN)         local failure, backtrack to next alternation


NEWLINE CONVENTIONS

       These  are  recognized only at the very start of the pattern or after a
       (*BSR_...) or (*UTF8) option.

         (*CR)           carriage return only
         (*LF)           linefeed only
         (*CRLF)         carriage return followed by linefeed
         (*ANYCRLF)      all three of the above
         (*ANY)          any Unicode newline sequence


WHAT \R MATCHES

       These are recognized only at the very start of the pattern or  after  a
       (*...) option that sets the newline convention or UTF-8 mode.

         (*BSR_ANYCRLF)  CR, LF, or CRLF
         (*BSR_UNICODE)  any Unicode newline sequence


CALLOUTS

         (?C)      callout
         (?Cn)     callout with data n


SEE ALSO

       pcrepattern(3), pcreapi(3), pcrecallout(3), pcrematching(3), pcre(3).


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 01 March 2010
       Copyright (c) 1997-2010 University of Cambridge.
------------------------------------------------------------------------------


PCREPARTIAL(3)                                                  PCREPARTIAL(3)


NAME
       PCRE - Perl-compatible regular expressions


PARTIAL MATCHING IN PCRE

       In  normal  use  of  PCRE,  if  the  subject  string  that is passed to
       pcre_exec() or pcre_dfa_exec() matches as far as it goes,  but  is  too
       short  to  match  the  entire  pattern, PCRE_ERROR_NOMATCH is returned.
       There are circumstances where it might be helpful to  distinguish  this
       case from other cases in which there is no match.

       Consider, for example, an application where a human is required to type
       in data for a field with specific formatting requirements.  An  example
       might be a date in the form ddmmmyy, defined by this pattern:

         ^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$

       If the application sees the user's keystrokes one by one, and can check
       that what has been typed so far is potentially valid,  it  is  able  to
       raise  an  error  as  soon  as  a  mistake  is made, by beeping and not
       reflecting the character that has been typed, for example. This immedi-
       ate  feedback is likely to be a better user interface than a check that
       is delayed until the entire string has been entered.  Partial  matching
       can  also  sometimes be useful when the subject string is very long and
       is not all available at once.

       PCRE supports partial matching by means of  the  PCRE_PARTIAL_SOFT  and
       PCRE_PARTIAL_HARD options, which can be set when calling pcre_exec() or
       pcre_dfa_exec(). For backwards compatibility, PCRE_PARTIAL is a synonym
       for PCRE_PARTIAL_SOFT. The essential difference between the two options
       is whether or not a partial match is preferred to an  alternative  com-
       plete  match,  though the details differ between the two matching func-
       tions. If both options are set, PCRE_PARTIAL_HARD takes precedence.

       Setting a partial matching option disables two of PCRE's optimizations.
       PCRE  remembers the last literal byte in a pattern, and abandons match-
       ing immediately if such a byte is not present in  the  subject  string.
       This  optimization cannot be used for a subject string that might match
       only partially. If the pattern was  studied,  PCRE  knows  the  minimum
       length  of  a  matching string, and does not bother to run the matching
       function on shorter strings. This optimization  is  also  disabled  for
       partial matching.


PARTIAL MATCHING USING pcre_exec()

       A partial match occurs during a call to pcre_exec() whenever the end of
       the subject string is reached successfully, but  matching  cannot  con-
       tinue because more characters are needed. However, at least one charac-
       ter must have been matched. (In other words, a partial match can  never
       be an empty string.)

       If  PCRE_PARTIAL_SOFT  is  set,  the  partial  match is remembered, but
       matching continues as normal, and other alternatives in the pattern are
       tried.   If  no  complete  match  can  be  found,  pcre_exec()  returns
       PCRE_ERROR_PARTIAL instead of PCRE_ERROR_NOMATCH. If there are at least
       two slots in the offsets vector, the first of them is set to the offset
       of the earliest character that was inspected when the partial match was
       found.  For  convenience,  the  second  offset points to the end of the
       string so that a substring can easily be identified.

       For the majority of patterns, the first offset identifies the start  of
       the  partially matched string. However, for patterns that contain look-
       behind assertions, or \K, or begin with \b or  \B,  earlier  characters
       have been inspected while carrying out the match. For example:

         /(?<=abc)123/

       This pattern matches "123", but only if it is preceded by "abc". If the
       subject string is "xyzabc12", the offsets after a partial match are for
       the  substring  "abc12",  because  all  these  characters are needed if
       another match is tried with extra characters added.

       If there is more than one partial match, the first one that  was  found
       provides the data that is returned. Consider this pattern:

         /123\w+X|dogY/

       If  this is matched against the subject string "abc123dog", both alter-
       natives fail to match, but the end of the  subject  is  reached  during
       matching,    so    PCRE_ERROR_PARTIAL    is    returned    instead   of
       PCRE_ERROR_NOMATCH. The  offsets  are  set  to  3  and  9,  identifying
       "123dog"  as  the first partial match that was found. (In this example,
       there are two partial matches,  because  "dog"  on  its  own  partially
       matches the second alternative.)

       If PCRE_PARTIAL_HARD is set for pcre_exec(), it returns PCRE_ERROR_PAR-
       TIAL as soon as a partial match is found, without continuing to  search
       for  possible  complete matches. The difference between the two options
       can be illustrated by a pattern such as:

         /dog(sbody)?/

       This matches either "dog" or "dogsbody", greedily (that is, it  prefers
       the  longer  string  if  possible). If it is matched against the string
       "dog" with PCRE_PARTIAL_SOFT, it yields a  complete  match  for  "dog".
       However, if PCRE_PARTIAL_HARD is set, the result is PCRE_ERROR_PARTIAL.
       On the other hand, if the pattern is made ungreedy the result  is  dif-
       ferent:

         /dog(sbody)??/

       In  this case the result is always a complete match because pcre_exec()
       finds that first, and it never continues  after  finding  a  match.  It
       might  be easier to follow this explanation by thinking of the two pat-
       terns like this:

         /dog(sbody)?/    is the same as  /dogsbody|dog/
         /dog(sbody)??/   is the same as  /dog|dogsbody/

       The second pattern will never  match  "dogsbody"  when  pcre_exec()  is
       used, because it will always find the shorter match first.


PARTIAL MATCHING USING pcre_dfa_exec()

       The  pcre_dfa_exec()  function moves along the subject string character
       by character, without backtracking, searching for all possible  matches
       simultaneously.  If the end of the subject is reached before the end of
       the pattern, there is the possibility of a partial  match,  again  pro-
       vided that at least one character has matched.

       When  PCRE_PARTIAL_SOFT  is set, PCRE_ERROR_PARTIAL is returned only if
       there have been no complete matches. Otherwise,  the  complete  matches
       are  returned.   However,  if PCRE_PARTIAL_HARD is set, a partial match
       takes precedence over any complete matches. The portion of  the  string
       that  was  inspected when the longest partial match was found is set as
       the first matching string, provided there are at least two slots in the
       offsets vector.

       Because  pcre_dfa_exec()  always searches for all possible matches, and
       there is no difference between greedy and ungreedy repetition, its  be-
       haviour is different from pcre_exec when PCRE_PARTIAL_HARD is set. Con-
       sider the string "dog"  matched  against  the  ungreedy  pattern  shown
       above:

         /dog(sbody)??/

       Whereas  pcre_exec()  stops  as soon as it finds the complete match for
       "dog", pcre_dfa_exec() also finds the partial match for "dogsbody", and
       so returns that when PCRE_PARTIAL_HARD is set.


PARTIAL MATCHING AND WORD BOUNDARIES

       If  a  pattern ends with one of sequences \b or \B, which test for word
       boundaries, partial matching with PCRE_PARTIAL_SOFT can  give  counter-
       intuitive results. Consider this pattern:

         /\bcat\b/

       This matches "cat", provided there is a word boundary at either end. If
       the subject string is "the cat", the comparison of the final "t" with a
       following  character  cannot  take  place, so a partial match is found.
       However, pcre_exec() carries on with normal matching, which matches  \b
       at  the  end  of  the subject when the last character is a letter, thus
       finding a complete match. The result, therefore, is not PCRE_ERROR_PAR-
       TIAL.  The  same  thing  happens  with pcre_dfa_exec(), because it also
       finds the complete match.

       Using PCRE_PARTIAL_HARD in this  case  does  yield  PCRE_ERROR_PARTIAL,
       because then the partial match takes precedence.


FORMERLY RESTRICTED PATTERNS

       For releases of PCRE prior to 8.00, because of the way certain internal
       optimizations  were  implemented  in  the  pcre_exec()  function,   the
       PCRE_PARTIAL  option  (predecessor  of  PCRE_PARTIAL_SOFT) could not be
       used with all patterns. From release 8.00 onwards, the restrictions  no
       longer  apply,  and  partial matching with pcre_exec() can be requested
       for any pattern.

       Items that were formerly restricted were repeated single characters and
       repeated  metasequences. If PCRE_PARTIAL was set for a pattern that did
       not conform to the restrictions, pcre_exec() returned  the  error  code
       PCRE_ERROR_BADPARTIAL  (-13).  This error code is no longer in use. The
       PCRE_INFO_OKPARTIAL call to pcre_fullinfo() to find out if  a  compiled
       pattern can be used for partial matching now always returns 1.


EXAMPLE OF PARTIAL MATCHING USING PCRETEST

       If  the  escape  sequence  \P  is  present in a pcretest data line, the
       PCRE_PARTIAL_SOFT option is used for  the  match.  Here  is  a  run  of
       pcretest that uses the date example quoted above:

           re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
         data> 25jun04\P
          0: 25jun04
          1: jun
         data> 25dec3\P
         Partial match: 23dec3
         data> 3ju\P
         Partial match: 3ju
         data> 3juj\P
         No match
         data> j\P
         No match

       The  first  data  string  is  matched completely, so pcretest shows the
       matched substrings. The remaining four strings do not  match  the  com-
       plete pattern, but the first two are partial matches. Similar output is
       obtained when pcre_dfa_exec() is used.

       If the escape sequence \P is present more than once in a pcretest  data
       line, the PCRE_PARTIAL_HARD option is set for the match.


MULTI-SEGMENT MATCHING WITH pcre_dfa_exec()

       When a partial match has been found using pcre_dfa_exec(), it is possi-
       ble to continue the match by  providing  additional  subject  data  and
       calling  pcre_dfa_exec()  again  with the same compiled regular expres-
       sion, this time setting the PCRE_DFA_RESTART option. You must pass  the
       same working space as before, because this is where details of the pre-
       vious partial match are stored. Here  is  an  example  using  pcretest,
       using  the  \R  escape  sequence to set the PCRE_DFA_RESTART option (\D
       specifies the use of pcre_dfa_exec()):

           re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
         data> 23ja\P\D
         Partial match: 23ja
         data> n05\R\D
          0: n05

       The first call has "23ja" as the subject, and requests  partial  match-
       ing;  the  second  call  has  "n05"  as  the  subject for the continued
       (restarted) match.  Notice that when the match is  complete,  only  the
       last  part  is  shown;  PCRE  does not retain the previously partially-
       matched string. It is up to the calling program to do that if it  needs
       to.

       You  can  set  the  PCRE_PARTIAL_SOFT or PCRE_PARTIAL_HARD options with
       PCRE_DFA_RESTART to continue partial matching over  multiple  segments.
       This  facility  can  be  used  to  pass  very  long  subject strings to
       pcre_dfa_exec().


MULTI-SEGMENT MATCHING WITH pcre_exec()

       From release 8.00, pcre_exec() can also be  used  to  do  multi-segment
       matching.  Unlike  pcre_dfa_exec(),  it  is not possible to restart the
       previous match with a new segment of data. Instead, new  data  must  be
       added  to  the  previous  subject  string, and the entire match re-run,
       starting from the point where the partial match occurred. Earlier  data
       can be discarded.  Consider an unanchored pattern that matches dates:

           re> /\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d/
         data> The date is 23ja\P
         Partial match: 23ja

       At  this stage, an application could discard the text preceding "23ja",
       add on text from the next segment, and call pcre_exec()  again.  Unlike
       pcre_dfa_exec(),  the  entire matching string must always be available,
       and the complete matching process occurs for each call, so more  memory
       and more processing time is needed.

       Note:  If  the pattern contains lookbehind assertions, or \K, or starts
       with \b or \B, the string that is returned for  a  partial  match  will
       include  characters  that  precede the partially matched string itself,
       because these must be retained when adding on  more  characters  for  a
       subsequent matching attempt.


ISSUES WITH MULTI-SEGMENT MATCHING

       Certain types of pattern may give problems with multi-segment matching,
       whichever matching function is used.

       1. If the pattern contains tests for the beginning or end  of  a  line,
       you  need  to pass the PCRE_NOTBOL or PCRE_NOTEOL options, as appropri-
       ate, when the subject string for any call does not contain  the  begin-
       ning or end of a line.

       2.  Lookbehind  assertions at the start of a pattern are catered for in
       the offsets that are returned for a partial match. However, in  theory,
       a  lookbehind assertion later in the pattern could require even earlier
       characters to be inspected, and it might not have been reached  when  a
       partial  match occurs. This is probably an extremely unlikely case; you
       could guard against it to a certain extent by  always  including  extra
       characters at the start.

       3.  Matching  a subject string that is split into multiple segments may
       not always produce exactly the same result as matching over one  single
       long  string,  especially  when  PCRE_PARTIAL_SOFT is used. The section
       "Partial Matching and Word Boundaries" above describes  an  issue  that
       arises  if  the  pattern ends with \b or \B. Another kind of difference
       may occur when there are multiple  matching  possibilities,  because  a
       partial match result is given only when there are no completed matches.
       This means that as soon as the shortest match has been found, continua-
       tion  to  a  new subject segment is no longer possible.  Consider again
       this pcretest example:

           re> /dog(sbody)?/
         data> dogsb\P
          0: dog
         data> do\P\D
         Partial match: do
         data> gsb\R\P\D
          0: g
         data> dogsbody\D
          0: dogsbody
          1: dog

       The first data line passes the string "dogsb" to  pcre_exec(),  setting
       the  PCRE_PARTIAL_SOFT  option.  Although the string is a partial match
       for "dogsbody", the  result  is  not  PCRE_ERROR_PARTIAL,  because  the
       shorter  string  "dog" is a complete match. Similarly, when the subject
       is presented to pcre_dfa_exec() in several parts ("do" and "gsb"  being
       the first two) the match stops when "dog" has been found, and it is not
       possible to continue. On the other hand, if "dogsbody" is presented  as
       a single string, pcre_dfa_exec() finds both matches.

       Because of these problems, it is probably best to use PCRE_PARTIAL_HARD
       when matching multi-segment data. The example above then  behaves  dif-
       ferently:

           re> /dog(sbody)?/
         data> dogsb\P\P
         Partial match: dogsb
         data> do\P\D
         Partial match: do
         data> gsb\R\P\P\D
         Partial match: gsb


       4. Patterns that contain alternatives at the top level which do not all
       start with the  same  pattern  item  may  not  work  as  expected  when
       PCRE_DFA_RESTART  is  used  with pcre_dfa_exec(). For example, consider
       this pattern:

         1234|3789

       If the first part of the subject is "ABC123", a partial  match  of  the
       first  alternative  is found at offset 3. There is no partial match for
       the second alternative, because such a match does not start at the same
       point  in  the  subject  string. Attempting to continue with the string
       "7890" does not yield a match  because  only  those  alternatives  that
       match  at  one  point in the subject are remembered. The problem arises
       because the start of the second alternative matches  within  the  first
       alternative.  There  is  no  problem with anchored patterns or patterns
       such as:

         1234|ABCD

       where no string can be a partial match for both alternatives.  This  is
       not  a  problem if pcre_exec() is used, because the entire match has to
       be rerun each time:

           re> /1234|3789/
         data> ABC123\P
         Partial match: 123
         data> 1237890
          0: 3789

       Of course, instead of using PCRE_DFA_PARTIAL, the same technique of re-
       running the entire match can also be used with pcre_dfa_exec(). Another
       possibility is to work with two buffers. If a partial match at offset n
       in  the first buffer is followed by "no match" when PCRE_DFA_RESTART is
       used on the second buffer, you can then try a  new  match  starting  at
       offset n+1 in the first buffer.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 19 October 2009
       Copyright (c) 1997-2009 University of Cambridge.
------------------------------------------------------------------------------


PCREPRECOMPILE(3)                                            PCREPRECOMPILE(3)


NAME
       PCRE - Perl-compatible regular expressions


SAVING AND RE-USING PRECOMPILED PCRE PATTERNS

       If  you  are running an application that uses a large number of regular
       expression patterns, it may be useful to store them  in  a  precompiled
       form  instead  of  having to compile them every time the application is
       run.  If you are not  using  any  private  character  tables  (see  the
       pcre_maketables()  documentation),  this is relatively straightforward.
       If you are using private tables, it is a little bit more complicated.

       If you save compiled patterns to a file, you can copy them to a differ-
       ent  host  and  run them there. This works even if the new host has the
       opposite endianness to the one on which  the  patterns  were  compiled.
       There  may  be a small performance penalty, but it should be insignifi-
       cant. However, compiling regular expressions with one version  of  PCRE
       for  use  with  a  different  version is not guaranteed to work and may
       cause crashes.


SAVING A COMPILED PATTERN
       The value returned by pcre_compile() points to a single block of memory
       that  holds  the compiled pattern and associated data. You can find the
       length of this block in bytes by calling pcre_fullinfo() with an  argu-
       ment  of  PCRE_INFO_SIZE. You can then save the data in any appropriate
       manner. Here is sample code that compiles a pattern and writes it to  a
       file. It assumes that the variable fd refers to a file that is open for
       output:

         int erroroffset, rc, size;
         char *error;
         pcre *re;

         re = pcre_compile("my pattern", 0, &error, &erroroffset, NULL);
         if (re == NULL) { ... handle errors ... }
         rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
         if (rc < 0) { ... handle errors ... }
         rc = fwrite(re, 1, size, fd);
         if (rc != size) { ... handle errors ... }

       In this example, the bytes  that  comprise  the  compiled  pattern  are
       copied  exactly.  Note that this is binary data that may contain any of
       the 256 possible byte  values.  On  systems  that  make  a  distinction
       between binary and non-binary data, be sure that the file is opened for
       binary output.

       If you want to write more than one pattern to a file, you will have  to
       devise  a  way of separating them. For binary data, preceding each pat-
       tern with its length is probably  the  most  straightforward  approach.
       Another  possibility is to write out the data in hexadecimal instead of
       binary, one pattern to a line.

       Saving compiled patterns in a file is only one possible way of  storing
       them  for later use. They could equally well be saved in a database, or
       in the memory of some daemon process that passes them  via  sockets  to
       the processes that want them.

       If  the pattern has been studied, it is also possible to save the study
       data in a similar way to the compiled  pattern  itself.  When  studying
       generates  additional  information, pcre_study() returns a pointer to a
       pcre_extra data block. Its format is defined in the section on matching
       a  pattern in the pcreapi documentation. The study_data field points to
       the binary study data,  and  this  is  what  you  must  save  (not  the
       pcre_extra  block itself). The length of the study data can be obtained
       by calling pcre_fullinfo() with  an  argument  of  PCRE_INFO_STUDYSIZE.
       Remember  to check that pcre_study() did return a non-NULL value before
       trying to save the study data.


RE-USING A PRECOMPILED PATTERN

       Re-using a precompiled pattern is straightforward. Having  reloaded  it
       into   main   memory,   you   pass   its   pointer  to  pcre_exec()  or
       pcre_dfa_exec() in the usual way. This  should  work  even  on  another
       host,  and  even  if  that  host has the opposite endianness to the one
       where the pattern was compiled.

       However, if you passed a pointer to custom character  tables  when  the
       pattern  was  compiled  (the  tableptr argument of pcre_compile()), you
       must now pass a similar  pointer  to  pcre_exec()  or  pcre_dfa_exec(),
       because  the  value  saved  with the compiled pattern will obviously be
       nonsense. A field in a pcre_extra() block is used to pass this data, as
       described  in the section on matching a pattern in the pcreapi documen-
       tation.

       If you did not provide custom character tables  when  the  pattern  was
       compiled,  the  pointer  in  the compiled pattern is NULL, which causes
       pcre_exec() to use PCRE's internal tables. Thus, you  do  not  need  to
       take any special action at run time in this case.

       If  you  saved study data with the compiled pattern, you need to create
       your own pcre_extra data block and set the study_data field to point to
       the  reloaded  study  data. You must also set the PCRE_EXTRA_STUDY_DATA
       bit in the flags field to indicate that study  data  is  present.  Then
       pass  the  pcre_extra  block  to  pcre_exec() or pcre_dfa_exec() in the
       usual way.


COMPATIBILITY WITH DIFFERENT PCRE RELEASES

       In general, it is safest to  recompile  all  saved  patterns  when  you
       update  to  a new PCRE release, though not all updates actually require
       this. Recompiling is definitely needed for release 7.2.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 13 June 2007
       Copyright (c) 1997-2007 University of Cambridge.
------------------------------------------------------------------------------


PCREPERFORM(3)                                                  PCREPERFORM(3)


NAME
       PCRE - Perl-compatible regular expressions


PCRE PERFORMANCE

       Two  aspects  of performance are discussed below: memory usage and pro-
       cessing time. The way you express your pattern as a regular  expression
       can affect both of them.


COMPILED PATTERN MEMORY USAGE

       Patterns are compiled by PCRE into a reasonably efficient byte code, so
       that most simple patterns do not use much memory. However, there is one
       case  where  the memory usage of a compiled pattern can be unexpectedly
       large. If a parenthesized subpattern has a quantifier  with  a  minimum
       greater  than  1  and/or  a  limited  maximum,  the whole subpattern is
       repeated in the compiled code. For example, the pattern

         (abc|def){2,4}

       is compiled as if it were

         (abc|def)(abc|def)((abc|def)(abc|def)?)?

       (Technical aside: It is done this way so that backtrack  points  within
       each of the repetitions can be independently maintained.)

       For  regular expressions whose quantifiers use only small numbers, this
       is not usually a problem. However, if the numbers are large,  and  par-
       ticularly  if  such repetitions are nested, the memory usage can become
       an embarrassment. For example, the very simple pattern

         ((ab){1,1000}c){1,3}

       uses 51K bytes when compiled. When PCRE is compiled  with  its  default
       internal  pointer  size of two bytes, the size limit on a compiled pat-
       tern is 64K, and this is reached with the above pattern  if  the  outer
       repetition is increased from 3 to 4. PCRE can be compiled to use larger
       internal pointers and thus handle larger compiled patterns, but  it  is
       better to try to rewrite your pattern to use less memory if you can.

       One  way  of reducing the memory usage for such patterns is to make use
       of PCRE's "subroutine" facility. Re-writing the above pattern as

         ((ab)(?2){0,999}c)(?1){0,2}

       reduces the memory requirements to 18K, and indeed it remains under 20K
       even  with the outer repetition increased to 100. However, this pattern
       is not exactly equivalent, because the "subroutine" calls  are  treated
       as  atomic groups into which there can be no backtracking if there is a
       subsequent matching failure. Therefore, PCRE cannot  do  this  kind  of
       rewriting  automatically.   Furthermore,  there is a noticeable loss of
       speed when executing the modified pattern. Nevertheless, if the  atomic
       grouping  is  not  a  problem and the loss of speed is acceptable, this
       kind of rewriting will allow you to process patterns that  PCRE  cannot
       otherwise handle.


STACK USAGE AT RUN TIME

       When  pcre_exec()  is  used  for matching, certain kinds of pattern can
       cause it to use large amounts of the process stack.  In  some  environ-
       ments  the default process stack is quite small, and if it runs out the
       result is often SIGSEGV.  This issue is probably  the  most  frequently
       raised  problem  with  PCRE. Rewriting your pattern can often help. The
       pcrestack documentation discusses this issue in detail.


PROCESSING TIME

       Certain items in regular expression patterns are processed  more  effi-
       ciently than others. It is more efficient to use a character class like
       [aeiou]  than  a  set  of   single-character   alternatives   such   as
       (a|e|i|o|u).  In  general,  the simplest construction that provides the
       required behaviour is usually the most efficient. Jeffrey Friedl's book
       contains  a  lot  of useful general discussion about optimizing regular
       expressions for efficient performance. This  document  contains  a  few
       observations about PCRE.

       Using  Unicode  character  properties  (the  \p, \P, and \X escapes) is
       slow, because PCRE has to scan a structure that contains data for  over
       fifteen  thousand  characters whenever it needs a character's property.
       If you can find an alternative pattern  that  does  not  use  character
       properties, it will probably be faster.

       When  a  pattern  begins  with .* not in parentheses, or in parentheses
       that are not the subject of a backreference, and the PCRE_DOTALL option
       is  set, the pattern is implicitly anchored by PCRE, since it can match
       only at the start of a subject string. However, if PCRE_DOTALL  is  not
       set,  PCRE  cannot  make this optimization, because the . metacharacter
       does not then match a newline, and if the subject string contains  new-
       lines,  the  pattern may match from the character immediately following
       one of them instead of from the very start. For example, the pattern

         .*second

       matches the subject "first\nand second" (where \n stands for a  newline
       character),  with the match starting at the seventh character. In order
       to do this, PCRE has to retry the match starting after every newline in
       the subject.

       If  you  are using such a pattern with subject strings that do not con-
       tain newlines, the best performance is obtained by setting PCRE_DOTALL,
       or  starting  the pattern with ^.* or ^.*? to indicate explicit anchor-
       ing. That saves PCRE from having to scan along the subject looking  for
       a newline to restart at.

       Beware  of  patterns  that contain nested indefinite repeats. These can
       take a long time to run when applied to a string that does  not  match.
       Consider the pattern fragment

         ^(a+)*

       This  can  match "aaaa" in 16 different ways, and this number increases
       very rapidly as the string gets longer. (The * repeat can match  0,  1,
       2,  3, or 4 times, and for each of those cases other than 0 or 4, the +
       repeats can match different numbers of times.) When  the  remainder  of
       the pattern is such that the entire match is going to fail, PCRE has in
       principle to try  every  possible  variation,  and  this  can  take  an
       extremely long time, even for relatively short strings.

       An optimization catches some of the more simple cases such as

         (a+)*b

       where  a  literal  character  follows. Before embarking on the standard
       matching procedure, PCRE checks that there is a "b" later in  the  sub-
       ject  string, and if there is not, it fails the match immediately. How-
       ever, when there is no following literal this  optimization  cannot  be
       used. You can see the difference by comparing the behaviour of

         (a+)*\d

       with  the  pattern  above.  The former gives a failure almost instantly
       when applied to a whole line of  "a"  characters,  whereas  the  latter
       takes an appreciable time with strings longer than about 20 characters.

       In many cases, the solution to this kind of performance issue is to use
       an atomic group or a possessive quantifier.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 07 March 2010
       Copyright (c) 1997-2010 University of Cambridge.
------------------------------------------------------------------------------


PCREPOSIX(3)                                                      PCREPOSIX(3)


NAME
       PCRE - Perl-compatible regular expressions.


SYNOPSIS OF POSIX API

       #include <pcreposix.h>

       int regcomp(regex_t *preg, const char *pattern,
            int cflags);

       int regexec(regex_t *preg, const char *string,
            size_t nmatch, regmatch_t pmatch[], int eflags);

       size_t regerror(int errcode, const regex_t *preg,
            char *errbuf, size_t errbuf_size);

       void regfree(regex_t *preg);


DESCRIPTION

       This  set  of  functions provides a POSIX-style API to the PCRE regular
       expression package. See the pcreapi documentation for a description  of
       PCRE's native API, which contains much additional functionality.

       The functions described here are just wrapper functions that ultimately
       call  the  PCRE  native  API.  Their  prototypes  are  defined  in  the
       pcreposix.h  header  file,  and  on  Unix systems the library itself is
       called pcreposix.a, so can be accessed by  adding  -lpcreposix  to  the
       command  for  linking  an application that uses them. Because the POSIX
       functions call the native ones, it is also necessary to add -lpcre.

       I have implemented only those POSIX option bits that can be  reasonably
       mapped  to PCRE native options. In addition, the option REG_EXTENDED is
       defined with the value zero. This has no  effect,  but  since  programs
       that  are  written  to  the POSIX interface often use it, this makes it
       easier to slot in PCRE as a replacement library.  Other  POSIX  options
       are not even defined.

       There  are also some other options that are not defined by POSIX. These
       have been added at the request of users who want to make use of certain
       PCRE-specific features via the POSIX calling interface.

       When  PCRE  is  called  via these functions, it is only the API that is
       POSIX-like in style. The syntax and semantics of  the  regular  expres-
       sions  themselves  are  still  those of Perl, subject to the setting of
       various PCRE options, as described below. "POSIX-like in  style"  means
       that  the  API  approximates  to  the POSIX definition; it is not fully
       POSIX-compatible, and in multi-byte encoding  domains  it  is  probably
       even less compatible.

       The  header for these functions is supplied as pcreposix.h to avoid any
       potential clash with other POSIX  libraries.  It  can,  of  course,  be
       renamed or aliased as regex.h, which is the "correct" name. It provides
       two structure types, regex_t for  compiled  internal  forms,  and  reg-
       match_t  for  returning  captured substrings. It also defines some con-
       stants whose names start  with  "REG_";  these  are  used  for  setting
       options and identifying error codes.


COMPILING A PATTERN

       The  function regcomp() is called to compile a pattern into an internal
       form. The pattern is a C string terminated by a  binary  zero,  and  is
       passed  in  the  argument  pattern. The preg argument is a pointer to a
       regex_t structure that is used as a base for storing information  about
       the compiled regular expression.

       The argument cflags is either zero, or contains one or more of the bits
       defined by the following macros:

         REG_DOTALL

       The PCRE_DOTALL option is set when the regular expression is passed for
       compilation to the native function. Note that REG_DOTALL is not part of
       the POSIX standard.

         REG_ICASE

       The PCRE_CASELESS option is set when the regular expression  is  passed
       for compilation to the native function.

         REG_NEWLINE

       The  PCRE_MULTILINE option is set when the regular expression is passed
       for compilation to the native function. Note that this does  not  mimic
       the  defined  POSIX  behaviour  for REG_NEWLINE (see the following sec-
       tion).

         REG_NOSUB

       The PCRE_NO_AUTO_CAPTURE option is set when the regular  expression  is
       passed for compilation to the native function. In addition, when a pat-
       tern that is compiled with this flag is passed to regexec() for  match-
       ing,  the  nmatch  and  pmatch  arguments  are ignored, and no captured
       strings are returned.

         REG_UNGREEDY

       The PCRE_UNGREEDY option is set when the regular expression  is  passed
       for  compilation  to the native function. Note that REG_UNGREEDY is not
       part of the POSIX standard.

         REG_UTF8

       The PCRE_UTF8 option is set when the regular expression is  passed  for
       compilation  to the native function. This causes the pattern itself and
       all data strings used for matching it to be treated as  UTF-8  strings.
       Note that REG_UTF8 is not part of the POSIX standard.

       In  the  absence  of  these  flags, no options are passed to the native
       function.  This means the the  regex  is  compiled  with  PCRE  default
       semantics.  In particular, the way it handles newline characters in the
       subject string is the Perl way, not the POSIX way.  Note  that  setting
       PCRE_MULTILINE  has only some of the effects specified for REG_NEWLINE.
       It does not affect the way newlines are matched by . (they are not)  or
       by a negative class such as [^a] (they are).

       The  yield of regcomp() is zero on success, and non-zero otherwise. The
       preg structure is filled in on success, and one member of the structure
       is  public: re_nsub contains the number of capturing subpatterns in the
       regular expression. Various error codes are defined in the header file.

       NOTE: If the yield of regcomp() is non-zero, you must  not  attempt  to
       use the contents of the preg structure. If, for example, you pass it to
       regexec(), the result is undefined and your program is likely to crash.


MATCHING NEWLINE CHARACTERS

       This area is not simple, because POSIX and Perl take different views of
       things.   It  is  not possible to get PCRE to obey POSIX semantics, but
       then PCRE was never intended to be a POSIX engine. The following  table
       lists  the  different  possibilities for matching newline characters in
       PCRE:

                                 Default   Change with

         . matches newline          no     PCRE_DOTALL
         newline matches [^a]       yes    not changeable
         $ matches \n at end        yes    PCRE_DOLLARENDONLY
         $ matches \n in middle     no     PCRE_MULTILINE
         ^ matches \n in middle     no     PCRE_MULTILINE

       This is the equivalent table for POSIX:

                                 Default   Change with

         . matches newline          yes    REG_NEWLINE
         newline matches [^a]       yes    REG_NEWLINE
         $ matches \n at end        no     REG_NEWLINE
         $ matches \n in middle     no     REG_NEWLINE
         ^ matches \n in middle     no     REG_NEWLINE

       PCRE's behaviour is the same as Perl's, except that there is no equiva-
       lent  for  PCRE_DOLLAR_ENDONLY in Perl. In both PCRE and Perl, there is
       no way to stop newline from matching [^a].

       The  default  POSIX  newline  handling  can  be  obtained  by   setting
       PCRE_DOTALL  and  PCRE_DOLLAR_ENDONLY, but there is no way to make PCRE
       behave exactly as for the REG_NEWLINE action.


MATCHING A PATTERN

       The function regexec() is called  to  match  a  compiled  pattern  preg
       against  a  given string, which is by default terminated by a zero byte
       (but see REG_STARTEND below), subject to the options in  eflags.  These
       can be:

         REG_NOTBOL

       The PCRE_NOTBOL option is set when calling the underlying PCRE matching
       function.

         REG_NOTEMPTY

       The PCRE_NOTEMPTY option is set when calling the underlying PCRE match-
       ing function. Note that REG_NOTEMPTY is not part of the POSIX standard.
       However, setting this option can give more POSIX-like behaviour in some
       situations.

         REG_NOTEOL

       The PCRE_NOTEOL option is set when calling the underlying PCRE matching
       function.

         REG_STARTEND

       The string is considered to start at string +  pmatch[0].rm_so  and  to
       have  a terminating NUL located at string + pmatch[0].rm_eo (there need
       not actually be a NUL at that location), regardless  of  the  value  of
       nmatch.  This  is a BSD extension, compatible with but not specified by
       IEEE Standard 1003.2 (POSIX.2), and should  be  used  with  caution  in
       software intended to be portable to other systems. Note that a non-zero
       rm_so does not imply REG_NOTBOL; REG_STARTEND affects only the location
       of the string, not how it is matched.

       If  the pattern was compiled with the REG_NOSUB flag, no data about any
       matched strings  is  returned.  The  nmatch  and  pmatch  arguments  of
       regexec() are ignored.

       If the value of nmatch is zero, or if the value pmatch is NULL, no data
       about any matched strings is returned.

       Otherwise,the portion of the string that was matched, and also any cap-
       tured substrings, are returned via the pmatch argument, which points to
       an array of nmatch structures of type regmatch_t, containing  the  mem-
       bers  rm_so  and rm_eo. These contain the offset to the first character
       of each substring and the offset to the first character after  the  end
       of  each substring, respectively. The 0th element of the vector relates
       to the entire portion of string that was matched;  subsequent  elements
       relate  to  the capturing subpatterns of the regular expression. Unused
       entries in the array have both structure members set to -1.

       A successful match yields  a  zero  return;  various  error  codes  are
       defined  in  the  header  file,  of which REG_NOMATCH is the "expected"
       failure code.


ERROR MESSAGES

       The regerror() function maps a non-zero errorcode from either regcomp()
       or  regexec()  to  a  printable message. If preg is not NULL, the error
       should have arisen from the use of that structure. A message terminated
       by  a  binary  zero  is  placed  in  errbuf. The length of the message,
       including the zero, is limited to errbuf_size. The yield of  the  func-
       tion is the size of buffer needed to hold the whole message.


MEMORY USAGE

       Compiling  a regular expression causes memory to be allocated and asso-
       ciated with the preg structure. The function regfree() frees  all  such
       memory,  after  which  preg may no longer be used as a compiled expres-
       sion.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 02 September 2009
       Copyright (c) 1997-2009 University of Cambridge.
------------------------------------------------------------------------------


PCRECPP(3)                                                          PCRECPP(3)


NAME
       PCRE - Perl-compatible regular expressions.


SYNOPSIS OF C++ WRAPPER

       #include <pcrecpp.h>


DESCRIPTION

       The  C++  wrapper  for PCRE was provided by Google Inc. Some additional
       functionality was added by Giuseppe Maxia. This brief man page was con-
       structed  from  the  notes  in the pcrecpp.h file, which should be con-
       sulted for further details.


MATCHING INTERFACE

       The "FullMatch" operation checks that supplied text matches a  supplied
       pattern  exactly.  If pointer arguments are supplied, it copies matched
       sub-strings that match sub-patterns into them.

         Example: successful match
            pcrecpp::RE re("h.*o");
            re.FullMatch("hello");

         Example: unsuccessful match (requires full match):
            pcrecpp::RE re("e");
            !re.FullMatch("hello");

         Example: creating a temporary RE object:
            pcrecpp::RE("h.*o").FullMatch("hello");

       You can pass in a "const char*" or a "string" for "text". The  examples
       below  tend to use a const char*. You can, as in the different examples
       above, store the RE object explicitly in a variable or use a  temporary
       RE  object.  The  examples below use one mode or the other arbitrarily.
       Either could correctly be used for any of these examples.

       You must supply extra pointer arguments to extract matched subpieces.

         Example: extracts "ruby" into "s" and 1234 into "i"
            int i;
            string s;
            pcrecpp::RE re("(\\w+):(\\d+)");
            re.FullMatch("ruby:1234", &s, &i);

         Example: does not try to extract any extra sub-patterns
            re.FullMatch("ruby:1234", &s);

         Example: does not try to extract into NULL
            re.FullMatch("ruby:1234", NULL, &i);

         Example: integer overflow causes failure
            !re.FullMatch("ruby:1234567891234", NULL, &i);

         Example: fails because there aren't enough sub-patterns:
            !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);

         Example: fails because string cannot be stored in integer
            !pcrecpp::RE("(.*)").FullMatch("ruby", &i);

       The provided pointer arguments can be pointers to  any  scalar  numeric
       type, or one of:

          string        (matched piece is copied to string)
          StringPiece   (StringPiece is mutated to point to matched piece)
          T             (where "bool T::ParseFrom(const char*, int)" exists)
          NULL          (the corresponding matched sub-pattern is not copied)

       The  function returns true iff all of the following conditions are sat-
       isfied:

         a. "text" matches "pattern" exactly;

         b. The number of matched sub-patterns is >= number of supplied
            pointers;

         c. The "i"th argument has a suitable type for holding the
            string captured as the "i"th sub-pattern. If you pass in
            void * NULL for the "i"th argument, or a non-void * NULL
            of the correct type, or pass fewer arguments than the
            number of sub-patterns, "i"th captured sub-pattern is
            ignored.

       CAVEAT: An optional sub-pattern that does  not  exist  in  the  matched
       string  is  assigned  the  empty  string. Therefore, the following will
       return false (because the empty string is not a valid number):

          int number;
          pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);

       The matching interface supports at most 16 arguments per call.  If  you
       need    more,    consider    using    the    more   general   interface
       pcrecpp::RE::DoMatch. See pcrecpp.h for the signature for DoMatch.

       NOTE: Do not use no_arg, which is used internally to mark the end of  a
       list  of optional arguments, as a placeholder for missing arguments, as
       this can lead to segfaults.


QUOTING METACHARACTERS

       You can use the "QuoteMeta" operation to insert backslashes before  all
       potentially  meaningful  characters  in  a string. The returned string,
       used as a regular expression, will exactly match the original string.

         Example:
            string quoted = RE::QuoteMeta(unquoted);

       Note that it's legal to escape a character even if it  has  no  special
       meaning  in  a  regular expression -- so this function does that. (This
       also makes it identical to the perl function  of  the  same  name;  see
       "perldoc    -f    quotemeta".)    For   example,   "1.5-2.0?"   becomes
       "1\.5\-2\.0\?".


PARTIAL MATCHES

       You can use the "PartialMatch" operation when you want the  pattern  to
       match any substring of the text.

         Example: simple search for a string:
            pcrecpp::RE("ell").PartialMatch("hello");

         Example: find first number in a string:
            int number;
            pcrecpp::RE re("(\\d+)");
            re.PartialMatch("x*100 + 20", &number);
            assert(number == 100);


UTF-8 AND THE MATCHING INTERFACE

       By  default,  pattern  and text are plain text, one byte per character.
       The UTF8 flag, passed to  the  constructor,  causes  both  pattern  and
       string to be treated as UTF-8 text, still a byte stream but potentially
       multiple bytes per character. In practice, the text is likelier  to  be
       UTF-8  than  the pattern, but the match returned may depend on the UTF8
       flag, so always use it when matching UTF8 text. For example,  "."  will
       match  one  byte normally but with UTF8 set may match up to three bytes
       of a multi-byte character.

         Example:
            pcrecpp::RE_Options options;
            options.set_utf8();
            pcrecpp::RE re(utf8_pattern, options);
            re.FullMatch(utf8_string);

         Example: using the convenience function UTF8():
            pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
            re.FullMatch(utf8_string);

       NOTE: The UTF8 flag is ignored if pcre was not configured with the
             --enable-utf8 flag.


PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE

       PCRE defines some modifiers to  change  the  behavior  of  the  regular
       expression   engine.  The  C++  wrapper  defines  an  auxiliary  class,
       RE_Options, as a vehicle to pass such modifiers to  a  RE  class.  Cur-
       rently, the following modifiers are supported:

          modifier              description               Perl corresponding

          PCRE_CASELESS         case insensitive match      /i
          PCRE_MULTILINE        multiple lines match        /m
          PCRE_DOTALL           dot matches newlines        /s
          PCRE_DOLLAR_ENDONLY   $ matches only at end       N/A
          PCRE_EXTRA            strict escape parsing       N/A
          PCRE_EXTENDED         ignore whitespaces          /x
          PCRE_UTF8             handles UTF8 chars          built-in
          PCRE_UNGREEDY         reverses * and *?           N/A
          PCRE_NO_AUTO_CAPTURE  disables capturing parens   N/A (*)

       (*)  Both Perl and PCRE allow non capturing parentheses by means of the
       "?:" modifier within the pattern itself. e.g. (?:ab|cd) does  not  cap-
       ture, while (ab|cd) does.

       For  a  full  account on how each modifier works, please check the PCRE
       API reference page.

       For each modifier, there are two member functions whose  name  is  made
       out  of  the  modifier  in  lowercase,  without the "PCRE_" prefix. For
       instance, PCRE_CASELESS is handled by

         bool caseless()

       which returns true if the modifier is set, and

         RE_Options & set_caseless(bool)

       which sets or unsets the modifier. Moreover, PCRE_EXTRA_MATCH_LIMIT can
       be  accessed  through  the  set_match_limit()  and match_limit() member
       functions. Setting match_limit to a non-zero value will limit the  exe-
       cution  of pcre to keep it from doing bad things like blowing the stack
       or taking an eternity to return a result.  A  value  of  5000  is  good
       enough  to stop stack blowup in a 2MB thread stack. Setting match_limit
       to  zero  disables  match  limiting.  Alternatively,   you   can   call
       match_limit_recursion()  which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to
       limit how much  PCRE  recurses.  match_limit()  limits  the  number  of
       matches PCRE does; match_limit_recursion() limits the depth of internal
       recursion, and therefore the amount of stack that is used.

       Normally, to pass one or more modifiers to a RE class,  you  declare  a
       RE_Options object, set the appropriate options, and pass this object to
       a RE constructor. Example:

          RE_options opt;
          opt.set_caseless(true);
          if (RE("HELLO", opt).PartialMatch("hello world")) ...

       RE_options has two constructors. The default constructor takes no argu-
       ments  and creates a set of flags that are off by default. The optional
       parameter option_flags is to facilitate transfer of legacy code from  C
       programs.  This lets you do

          RE(pattern,
            RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);

       However, new code is better off doing

          RE(pattern,
            RE_Options().set_caseless(true).set_multiline(true))
              .PartialMatch(str);

       If you are going to pass one of the most used modifiers, there are some
       convenience functions that return a RE_Options class with the appropri-
       ate  modifier  already  set: CASELESS(), UTF8(), MULTILINE(), DOTALL(),
       and EXTENDED().

       If you need to set several options at once, and you don't  want  to  go
       through  the pains of declaring a RE_Options object and setting several
       options, there is a parallel method that give you such ability  on  the
       fly.  You  can  concatenate several set_xxxxx() member functions, since
       each of them returns a reference to its class object. For  example,  to
       pass  PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
       statement, you may write:

          RE(" ^ xyz \\s+ .* blah$",
            RE_Options()
              .set_caseless(true)
              .set_extended(true)
              .set_multiline(true)).PartialMatch(sometext);


SCANNING TEXT INCREMENTALLY

       The "Consume" operation may be useful if you want to  repeatedly  match
       regular expressions at the front of a string and skip over them as they
       match. This requires use of the "StringPiece" type, which represents  a
       sub-range  of  a  real  string.  Like RE, StringPiece is defined in the
       pcrecpp namespace.

         Example: read lines of the form "var = value" from a string.
            string contents = ...;                 // Fill string somehow
            pcrecpp::StringPiece input(contents);  // Wrap in a StringPiece

            string var;
            int value;
            pcrecpp::RE re("(\\w+) = (\\d+)\n");
            while (re.Consume(&input, &var, &value)) {
              ...;
            }

       Each successful call  to  "Consume"  will  set  "var/value",  and  also
       advance "input" so it points past the matched text.

       The  "FindAndConsume"  operation  is  similar to "Consume" but does not
       anchor your match at the beginning of  the  string.  For  example,  you
       could extract all words from a string by repeatedly calling

         pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)


PARSING HEX/OCTAL/C-RADIX NUMBERS

       By default, if you pass a pointer to a numeric value, the corresponding
       text is interpreted as a base-10  number.  You  can  instead  wrap  the
       pointer with a call to one of the operators Hex(), Octal(), or CRadix()
       to interpret the text in another base. The CRadix  operator  interprets
       C-style  "0"  (base-8)  and  "0x"  (base-16)  prefixes, but defaults to
       base-10.

         Example:
           int a, b, c, d;
           pcrecpp::RE re("(.*) (.*) (.*) (.*)");
           re.FullMatch("100 40 0100 0x40",
                        pcrecpp::Octal(&a), pcrecpp::Hex(&b),
                        pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));

       will leave 64 in a, b, c, and d.


REPLACING PARTS OF STRINGS

       You can replace the first match of "pattern" in "str"  with  "rewrite".
       Within  "rewrite",  backslash-escaped  digits (\1 to \9) can be used to
       insert text matching corresponding parenthesized group  from  the  pat-
       tern. \0 in "rewrite" refers to the entire matching text. For example:

         string s = "yabba dabba doo";
         pcrecpp::RE("b+").Replace("d", &s);

       will  leave  "s" containing "yada dabba doo". The result is true if the
       pattern matches and a replacement occurs, false otherwise.

       GlobalReplace is like Replace except that it replaces  all  occurrences
       of  the  pattern  in  the string with the rewrite. Replacements are not
       subject to re-matching. For example:

         string s = "yabba dabba doo";
         pcrecpp::RE("b+").GlobalReplace("d", &s);

       will leave "s" containing "yada dada doo". It  returns  the  number  of
       replacements made.

       Extract  is like Replace, except that if the pattern matches, "rewrite"
       is copied into "out" (an additional argument) with substitutions.   The
       non-matching  portions  of "text" are ignored. Returns true iff a match
       occurred and the extraction happened successfully;  if no match occurs,
       the string is left unaffected.


AUTHOR

       The C++ wrapper was contributed by Google Inc.
       Copyright (c) 2007 Google Inc.


REVISION

       Last updated: 17 March 2009
------------------------------------------------------------------------------


PCRESAMPLE(3)                                                    PCRESAMPLE(3)


NAME
       PCRE - Perl-compatible regular expressions


PCRE SAMPLE PROGRAM

       A simple, complete demonstration program, to get you started with using
       PCRE, is supplied in the file pcredemo.c in the  PCRE  distribution.  A
       listing  of this program is given in the pcredemo documentation. If you
       do not have a copy of the PCRE distribution, you can save this  listing
       to re-create pcredemo.c.

       The program compiles the regular expression that is its first argument,
       and matches it against the subject string in its  second  argument.  No
       PCRE  options are set, and default character tables are used. If match-
       ing succeeds, the program outputs  the  portion  of  the  subject  that
       matched, together with the contents of any captured substrings.

       If the -g option is given on the command line, the program then goes on
       to check for further matches of the same regular expression in the same
       subject  string. The logic is a little bit tricky because of the possi-
       bility of matching an empty string. Comments in the code  explain  what
       is going on.

       If  PCRE  is  installed in the standard include and library directories
       for your operating system, you should be able to compile the demonstra-
       tion program using this command:

         gcc -o pcredemo pcredemo.c -lpcre

       If  PCRE is installed elsewhere, you may need to add additional options
       to the command line. For example, on a Unix-like system that  has  PCRE
       installed  in  /usr/local,  you  can  compile the demonstration program
       using a command like this:

         gcc -o pcredemo -I/usr/local/include pcredemo.c \
             -L/usr/local/lib -lpcre

       Once you have compiled the demonstration program, you  can  run  simple
       tests like this:

         ./pcredemo 'cat|dog' 'the cat sat on the mat'
         ./pcredemo -g 'cat|dog' 'the dog sat on the cat'

       Note  that  there  is  a  much  more comprehensive test program, called
       pcretest, which supports  many  more  facilities  for  testing  regular
       expressions and the PCRE library. The pcredemo program is provided as a
       simple coding example.

       When you try to run pcredemo when PCRE is not installed in the standard
       library  directory,  you  may  get an error like this on some operating
       systems (e.g. Solaris):

         ld.so.1: a.out: fatal: libpcre.so.0: open failed:  No  such  file  or
       directory

       This  is  caused  by the way shared library support works on those sys-
       tems. You need to add

         -R/usr/local/lib

       (for example) to the compile command to get round this problem.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 30 September 2009
       Copyright (c) 1997-2009 University of Cambridge.
------------------------------------------------------------------------------
PCRESTACK(3)                                                      PCRESTACK(3)


NAME
       PCRE - Perl-compatible regular expressions


PCRE DISCUSSION OF STACK USAGE

       When  you call pcre_exec(), it makes use of an internal function called
       match(). This calls itself recursively at branch points in the pattern,
       in  order to remember the state of the match so that it can back up and
       try a different alternative if the first one fails.  As  matching  pro-
       ceeds  deeper  and deeper into the tree of possibilities, the recursion
       depth increases.

       Not all calls of match() increase the recursion depth; for an item such
       as  a* it may be called several times at the same level, after matching
       different numbers of a's. Furthermore, in a number of cases  where  the
       result  of  the  recursive call would immediately be passed back as the
       result of the current call (a "tail recursion"), the function  is  just
       restarted instead.

       The pcre_dfa_exec() function operates in an entirely different way, and
       uses recursion only when there is a  regular  expression  recursion  or
       subroutine  call in the pattern. This includes the processing of asser-
       tion and "once-only" subpatterns, which  are  handled  like  subroutine
       calls.  Normally,  these are never very deep, and the limit on the com-
       plexity of pcre_dfa_exec() is controlled by the amount of workspace  it
       is  given. However, it is possible to write patterns with runaway infi-
       nite recursions; such patterns will cause pcre_dfa_exec() to run out of
       stack. At present, there is no protection against this.

       The comments that follow do NOT apply to pcre_dfa_exec(); they are rel-
       evant only for pcre_exec().

   Reducing pcre_exec()'s stack usage

       Each time that match() is actually called recursively, it  uses  memory
       from  the  process  stack.  For certain kinds of pattern and data, very
       large amounts of stack may be needed, despite the recognition of  "tail
       recursion".   You  can often reduce the amount of recursion, and there-
       fore the amount of stack used, by modifying the pattern that  is  being
       matched. Consider, for example, this pattern:

         ([^<]|<(?!inet))+

       It  matches  from wherever it starts until it encounters "<inet" or the
       end of the data, and is the kind of pattern that  might  be  used  when
       processing an XML file. Each iteration of the outer parentheses matches
       either one character that is not "<" or a "<" that is not  followed  by
       "inet".  However,  each  time  a  parenthesis is processed, a recursion
       occurs, so this formulation uses a stack frame for each matched charac-
       ter.  For  a long string, a lot of stack is required. Consider now this
       rewritten pattern, which matches exactly the same strings:

         ([^<]++|<(?!inet))+

       This uses very much less stack, because runs of characters that do  not
       contain  "<" are "swallowed" in one item inside the parentheses. Recur-
       sion happens only when a "<" character that is not followed  by  "inet"
       is  encountered  (and  we assume this is relatively rare). A possessive
       quantifier is used to stop any backtracking into the  runs  of  non-"<"
       characters, but that is not related to stack usage.

       This  example shows that one way of avoiding stack problems when match-
       ing long subject strings is to write repeated parenthesized subpatterns
       to match more than one character whenever possible.

   Compiling PCRE to use heap instead of stack for pcre_exec()

       In  environments  where  stack memory is constrained, you might want to
       compile PCRE to use heap memory instead of stack for remembering  back-
       up  points  when  pcre_exec()  is running. This makes it run a lot more
       slowly, however.  Details of how to do this are given in the  pcrebuild
       documentation. When built in this way, instead of using the stack, PCRE
       obtains and frees memory by calling the functions that are  pointed  to
       by  the  pcre_stack_malloc  and  pcre_stack_free variables. By default,
       these point to malloc() and free(), but you can replace the pointers to
       cause  PCRE to use your own functions. Since the block sizes are always
       the same, and are always freed in reverse order, it may be possible  to
       implement  customized  memory handlers that are more efficient than the
       standard functions.

   Limiting pcre_exec()'s stack usage

       You can set limits on the number of times that match() is called,  both
       in  total  and recursively. If a limit is exceeded, pcre_exec() returns
       an error code. Setting suitable limits should prevent it  from  running
       out  of  stack.  The  default  values of the limits are very large, and
       unlikely ever to operate. They can be changed when PCRE is  built,  and
       they  can  also be set when pcre_exec() is called. For details of these
       interfaces, see the pcrebuild documentation and the  section  on  extra
       data for pcre_exec() in the pcreapi documentation.

       As a very rough rule of thumb, you should reckon on about 500 bytes per
       recursion. Thus, if you want to limit your  stack  usage  to  8Mb,  you
       should  set  the  limit at 16000 recursions. A 64Mb stack, on the other
       hand, can support around 128000 recursions.

       In Unix-like environments, the pcretest test program has a command line
       option (-S) that can be used to increase the size of its stack. As long
       as the stack is large enough, another option (-M) can be used  to  find
       the  smallest  limits  that allow a particular pattern to match a given
       subject string. This is done by  calling  pcre_exec()  repeatedly  with
       different limits.

   Changing stack size in Unix-like systems

       In  Unix-like environments, there is not often a problem with the stack
       unless very long strings are involved,  though  the  default  limit  on
       stack  size  varies  from system to system. Values from 8Mb to 64Mb are
       common. You can find your default limit by running the command:

         ulimit -s

       Unfortunately, the effect of running out of  stack  is  often  SIGSEGV,
       though  sometimes  a more explicit error message is given. You can nor-
       mally increase the limit on stack size by code such as this:

         struct rlimit rlim;
         getrlimit(RLIMIT_STACK, &rlim);
         rlim.rlim_cur = 100*1024*1024;
         setrlimit(RLIMIT_STACK, &rlim);

       This reads the current limits (soft and hard) using  getrlimit(),  then
       attempts  to  increase  the  soft limit to 100Mb using setrlimit(). You
       must do this before calling pcre_exec().

   Changing stack size in Mac OS X

       Using setrlimit(), as described above, should also work on Mac OS X. It
       is also possible to set a stack size when linking a program. There is a
       discussion  about  stack  sizes  in  Mac  OS  X  at  this   web   site:
       http://developer.apple.com/qa/qa2005/qa1419.html.


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 03 January 2010
       Copyright (c) 1997-2010 University of Cambridge.
------------------------------------------------------------------------------


usr/share/doc/alt-pcre802-devel/pcretest.txt000064400000075365150403561530014676 0ustar00PCRETEST(1)                                                        PCRETEST(1)


NAME
       pcretest - a program for testing Perl-compatible regular expressions.


SYNOPSIS

       pcretest [options] [source] [destination]

       pcretest  was written as a test program for the PCRE regular expression
       library itself, but it can also be used for experimenting with  regular
       expressions.  This document describes the features of the test program;
       for details of the regular expressions themselves, see the  pcrepattern
       documentation. For details of the PCRE library function calls and their
       options, see the pcreapi documentation.


OPTIONS

       -b        Behave as if each regex has the /B (show bytecode)  modifier;
                 the internal form is output after compilation.

       -C        Output the version number of the PCRE library, and all avail-
                 able  information  about  the  optional  features  that   are
                 included, and then exit.

       -d        Behave  as  if  each  regex  has the /D (debug) modifier; the
                 internal form and information about the compiled  pattern  is
                 output after compilation; -d is equivalent to -b -i.

       -dfa      Behave  as if each data line contains the \D escape sequence;
                 this    causes    the    alternative    matching    function,
                 pcre_dfa_exec(),   to   be   used  instead  of  the  standard
                 pcre_exec() function (more detail is given below).

       -help     Output a brief summary these options and then exit.

       -i        Behave as if each regex  has  the  /I  modifier;  information
                 about the compiled pattern is given after compilation.

       -M        Behave  as if each data line contains the \M escape sequence;
                 this causes PCRE to  discover  the  minimum  MATCH_LIMIT  and
                 MATCH_LIMIT_RECURSION settings by calling pcre_exec() repeat-
                 edly with different limits.

       -m        Output the size of each compiled pattern after  it  has  been
                 compiled.  This  is  equivalent  to adding /M to each regular
                 expression.  For  compatibility  with  earlier  versions   of
                 pcretest, -s is a synonym for -m.

       -o osize  Set  the number of elements in the output vector that is used
                 when calling pcre_exec() or pcre_dfa_exec() to be osize.  The
                 default  value is 45, which is enough for 14 capturing subex-
                 pressions  for  pcre_exec()  or  22  different  matches   for
                 pcre_dfa_exec().  The vector size can be changed for individ-
                 ual matching calls by including \O  in  the  data  line  (see
                 below).

       -p        Behave  as if each regex has the /P modifier; the POSIX wrap-
                 per API is used to call PCRE. None of the other  options  has
                 any effect when -p is set.

       -q        Do  not output the version number of pcretest at the start of
                 execution.

       -S size   On Unix-like systems, set the size of the  runtime  stack  to
                 size megabytes.

       -t        Run  each  compile, study, and match many times with a timer,
                 and output resulting time per compile or match (in  millisec-
                 onds).  Do  not set -m with -t, because you will then get the
                 size output a zillion times, and  the  timing  will  be  dis-
                 torted.  You  can  control  the number of iterations that are
                 used for timing by following -t with a number (as a  separate
                 item on the command line). For example, "-t 1000" would iter-
                 ate 1000 times. The default is to iterate 500000 times.

       -tm       This is like -t except that it times only the matching phase,
                 not the compile or study phases.


DESCRIPTION

       If  pcretest  is  given two filename arguments, it reads from the first
       and writes to the second. If it is given only one filename argument, it
       reads  from  that  file  and writes to stdout. Otherwise, it reads from
       stdin and writes to stdout, and prompts for each line of  input,  using
       "re>" to prompt for regular expressions, and "data>" to prompt for data
       lines.

       When pcretest is built, a configuration  option  can  specify  that  it
       should  be  linked  with the libreadline library. When this is done, if
       the input is from a terminal, it is read using the readline() function.
       This  provides line-editing and history facilities. The output from the
       -help option states whether or not readline() will be used.

       The program handles any number of sets of input on a single input file.
       Each  set starts with a regular expression, and continues with any num-
       ber of data lines to be matched against the pattern.

       Each data line is matched separately and independently. If you want  to
       do multi-line matches, you have to use the \n escape sequence (or \r or
       \r\n, etc., depending on the newline setting) in a single line of input
       to  encode  the  newline  sequences. There is no limit on the length of
       data lines; the input buffer is automatically extended  if  it  is  too
       small.

       An  empty  line signals the end of the data lines, at which point a new
       regular expression is read. The regular expressions are given  enclosed
       in any non-alphanumeric delimiters other than backslash, for example:

         /(a|bc)x+yz/

       White  space before the initial delimiter is ignored. A regular expres-
       sion may be continued over several input lines, in which case the  new-
       line  characters  are included within it. It is possible to include the
       delimiter within the pattern by escaping it, for example

         /abc\/def/

       If you do so, the escape and the delimiter form part  of  the  pattern,
       but  since delimiters are always non-alphanumeric, this does not affect
       its interpretation.  If the terminating delimiter is  immediately  fol-
       lowed by a backslash, for example,

         /abc/\

       then  a  backslash  is added to the end of the pattern. This is done to
       provide a way of testing the error condition that arises if  a  pattern
       finishes with a backslash, because

         /abc\/

       is  interpreted as the first line of a pattern that starts with "abc/",
       causing pcretest to read the next line as a continuation of the regular
       expression.


PATTERN MODIFIERS

       A  pattern may be followed by any number of modifiers, which are mostly
       single characters. Following Perl usage, these are  referred  to  below
       as,  for  example,  "the /i modifier", even though the delimiter of the
       pattern need not always be a slash, and no slash is used  when  writing
       modifiers.  Whitespace  may  appear between the final pattern delimiter
       and the first modifier, and between the modifiers themselves.

       The /i, /m, /s, and /x modifiers set the PCRE_CASELESS, PCRE_MULTILINE,
       PCRE_DOTALL,  or  PCRE_EXTENDED  options,  respectively, when pcre_com-
       pile() is called. These four modifier letters have the same  effect  as
       they do in Perl. For example:

         /caseless/i

       The following table shows additional modifiers for setting PCRE options
       that do not correspond to anything in Perl:

         /A              PCRE_ANCHORED
         /C              PCRE_AUTO_CALLOUT
         /E              PCRE_DOLLAR_ENDONLY
         /f              PCRE_FIRSTLINE
         /J              PCRE_DUPNAMES
         /N              PCRE_NO_AUTO_CAPTURE
         /U              PCRE_UNGREEDY
         /X              PCRE_EXTRA
         /<JS>           PCRE_JAVASCRIPT_COMPAT
         /<cr>           PCRE_NEWLINE_CR
         /<lf>           PCRE_NEWLINE_LF
         /<crlf>         PCRE_NEWLINE_CRLF
         /<anycrlf>      PCRE_NEWLINE_ANYCRLF
         /<any>          PCRE_NEWLINE_ANY
         /<bsr_anycrlf>  PCRE_BSR_ANYCRLF
         /<bsr_unicode>  PCRE_BSR_UNICODE

       Those specifying line ending sequences are literal  strings  as  shown,
       but  the  letters  can  be  in either case. This example sets multiline
       matching with CRLF as the line ending sequence:

         /^abc/m<crlf>

       Details of the meanings of these PCRE options are given in the  pcreapi
       documentation.

   Finding all matches in a string

       Searching  for  all  possible matches within each subject string can be
       requested by the /g or /G modifier. After  finding  a  match,  PCRE  is
       called again to search the remainder of the subject string. The differ-
       ence between /g and /G is that the former uses the startoffset argument
       to  pcre_exec()  to  start  searching  at a new point within the entire
       string (which is in effect what Perl does), whereas the  latter  passes
       over  a  shortened  substring.  This makes a difference to the matching
       process if the pattern begins with a lookbehind assertion (including \b
       or \B).

       If  any  call  to  pcre_exec()  in a /g or /G sequence matches an empty
       string, the next  call  is  done  with  the  PCRE_NOTEMPTY_ATSTART  and
       PCRE_ANCHORED  flags  set  in  order  to search for another, non-empty,
       match at the same point. If this second match fails, the  start  offset
       is  advanced  by  one  character, and the normal match is retried. This
       imitates the way Perl handles such cases when using the /g modifier  or
       the split() function.

   Other modifiers

       There are yet more modifiers for controlling the way pcretest operates.

       The  /+ modifier requests that as well as outputting the substring that
       matched the entire pattern, pcretest  should  in  addition  output  the
       remainder  of  the  subject  string. This is useful for tests where the
       subject contains multiple copies of the same substring.

       The /B modifier is a debugging feature. It requests that pcretest  out-
       put  a representation of the compiled byte code after compilation. Nor-
       mally this information contains length and offset values;  however,  if
       /Z  is also present, this data is replaced by spaces. This is a special
       feature for use in the automatic test scripts; it ensures that the same
       output is generated for different internal link sizes.

       The  /L modifier must be followed directly by the name of a locale, for
       example,

         /pattern/Lfr_FR

       For this reason, it must be the last modifier. The given locale is set,
       pcre_maketables()  is called to build a set of character tables for the
       locale, and this is then passed to pcre_compile()  when  compiling  the
       regular  expression.  Without  an  /L  modifier,  NULL is passed as the
       tables pointer; that is, /L applies only to the expression on which  it
       appears.

       The  /I  modifier  requests  that pcretest output information about the
       compiled pattern (whether it is anchored, has a fixed first  character,
       and  so  on). It does this by calling pcre_fullinfo() after compiling a
       pattern. If the pattern is studied, the results of that are  also  out-
       put.

       The  /D modifier is a PCRE debugging feature, and is equivalent to /BI,
       that is, both the /B and the /I modifiers.

       The /F modifier causes pcretest to flip the byte order of the fields in
       the  compiled  pattern  that  contain  2-byte  and 4-byte numbers. This
       facility is for testing the feature in PCRE that allows it  to  execute
       patterns that were compiled on a host with a different endianness. This
       feature is not available when the POSIX  interface  to  PCRE  is  being
       used,  that is, when the /P pattern modifier is specified. See also the
       section about saving and reloading compiled patterns below.

       The /S modifier causes pcre_study() to be called after  the  expression
       has been compiled, and the results used when the expression is matched.

       The  /M  modifier causes the size of memory block used to hold the com-
       piled pattern to be output.

       The /P modifier causes pcretest to call PCRE via the POSIX wrapper  API
       rather  than  its  native  API.  When this is done, all other modifiers
       except /i, /m, and /+ are ignored. REG_ICASE is set if /i  is  present,
       and  REG_NEWLINE  is  set if /m is present. The wrapper functions force
       PCRE_DOLLAR_ENDONLY always, and PCRE_DOTALL unless REG_NEWLINE is set.

       The /8 modifier causes pcretest to call PCRE with the PCRE_UTF8  option
       set.  This  turns on support for UTF-8 character handling in PCRE, pro-
       vided that it was compiled with this  support  enabled.  This  modifier
       also causes any non-printing characters in output strings to be printed
       using the \x{hh...} notation if they are valid UTF-8 sequences.

       If the /? modifier  is  used  with  /8,  it  causes  pcretest  to  call
       pcre_compile()  with  the  PCRE_NO_UTF8_CHECK  option,  to suppress the
       checking of the string for UTF-8 validity.


DATA LINES

       Before each data line is passed to pcre_exec(),  leading  and  trailing
       whitespace  is  removed,  and it is then scanned for \ escapes. Some of
       these are pretty esoteric features, intended for checking out  some  of
       the  more  complicated features of PCRE. If you are just testing "ordi-
       nary" regular expressions, you probably don't need any  of  these.  The
       following escapes are recognized:

         \a         alarm (BEL, \x07)
         \b         backspace (\x08)
         \e         escape (\x27)
         \f         formfeed (\x0c)
         \n         newline (\x0a)
         \qdd       set the PCRE_MATCH_LIMIT limit to dd
                      (any number of digits)
         \r         carriage return (\x0d)
         \t         tab (\x09)
         \v         vertical tab (\x0b)
         \nnn       octal character (up to 3 octal digits)
         \xhh       hexadecimal character (up to 2 hex digits)
         \x{hh...}  hexadecimal character, any number of digits
                      in UTF-8 mode
         \A         pass the PCRE_ANCHORED option to pcre_exec()
                      or pcre_dfa_exec()
         \B         pass the PCRE_NOTBOL option to pcre_exec()
                      or pcre_dfa_exec()
         \Cdd       call pcre_copy_substring() for substring dd
                      after a successful match (number less than 32)
         \Cname     call pcre_copy_named_substring() for substring
                      "name" after a successful match (name termin-
                      ated by next non alphanumeric character)
         \C+        show the current captured substrings at callout
                      time
         \C-        do not supply a callout function
         \C!n       return 1 instead of 0 when callout number n is
                      reached
         \C!n!m     return 1 instead of 0 when callout number n is
                      reached for the nth time
         \C*n       pass the number n (may be negative) as callout
                      data; this is used as the callout return value
         \D         use the pcre_dfa_exec() match function
         \F         only shortest match for pcre_dfa_exec()
         \Gdd       call pcre_get_substring() for substring dd
                      after a successful match (number less than 32)
         \Gname     call pcre_get_named_substring() for substring
                      "name" after a successful match (name termin-
                      ated by next non-alphanumeric character)
         \L         call pcre_get_substringlist() after a
                      successful match
         \M         discover the minimum MATCH_LIMIT and
                      MATCH_LIMIT_RECURSION settings
         \N         pass the PCRE_NOTEMPTY option to pcre_exec()
                      or pcre_dfa_exec(); if used twice, pass the
                      PCRE_NOTEMPTY_ATSTART option
         \Odd       set the size of the output vector passed to
                      pcre_exec() to dd (any number of digits)
         \P         pass the PCRE_PARTIAL_SOFT option to pcre_exec()
                      or pcre_dfa_exec(); if used twice, pass the
                      PCRE_PARTIAL_HARD option
         \Qdd       set the PCRE_MATCH_LIMIT_RECURSION limit to dd
                      (any number of digits)
         \R         pass the PCRE_DFA_RESTART option to pcre_dfa_exec()
         \S         output details of memory get/free calls during matching
         \Y         pass the PCRE_NO_START_OPTIMIZE option to pcre_exec()
                      or pcre_dfa_exec()
         \Z         pass the PCRE_NOTEOL option to pcre_exec()
                      or pcre_dfa_exec()
         \?         pass the PCRE_NO_UTF8_CHECK option to
                      pcre_exec() or pcre_dfa_exec()
         \>dd       start the match at offset dd (any number of digits);
                      this sets the startoffset argument for pcre_exec()
                      or pcre_dfa_exec()
         \<cr>      pass the PCRE_NEWLINE_CR option to pcre_exec()
                      or pcre_dfa_exec()
         \<lf>      pass the PCRE_NEWLINE_LF option to pcre_exec()
                      or pcre_dfa_exec()
         \<crlf>    pass the PCRE_NEWLINE_CRLF option to pcre_exec()
                      or pcre_dfa_exec()
         \<anycrlf> pass the PCRE_NEWLINE_ANYCRLF option to pcre_exec()
                      or pcre_dfa_exec()
         \<any>     pass the PCRE_NEWLINE_ANY option to pcre_exec()
                      or pcre_dfa_exec()

       The  escapes  that  specify  line ending sequences are literal strings,
       exactly as shown. No more than one newline setting should be present in
       any data line.

       A  backslash  followed by anything else just escapes the anything else.
       If the very last character is a backslash, it is ignored. This gives  a
       way  of  passing  an empty line as data, since a real empty line termi-
       nates the data input.

       If \M is present, pcretest calls pcre_exec() several times,  with  dif-
       ferent  values  in  the match_limit and match_limit_recursion fields of
       the pcre_extra data structure, until it finds the minimum  numbers  for
       each parameter that allow pcre_exec() to complete. The match_limit num-
       ber is a measure of the amount of backtracking that  takes  place,  and
       checking it out can be instructive. For most simple matches, the number
       is quite small, but for patterns with very large  numbers  of  matching
       possibilities,  it can become large very quickly with increasing length
       of subject string. The match_limit_recursion number is a measure of how
       much  stack  (or,  if  PCRE is compiled with NO_RECURSE, how much heap)
       memory is needed to complete the match attempt.

       When \O is used, the value specified may be higher or  lower  than  the
       size set by the -O command line option (or defaulted to 45); \O applies
       only to the call of pcre_exec() for the line in which it appears.

       If the /P modifier was present on the pattern, causing the POSIX  wrap-
       per  API  to  be  used, the only option-setting sequences that have any
       effect are \B and \Z, causing REG_NOTBOL and REG_NOTEOL,  respectively,
       to be passed to regexec().

       The  use of \x{hh...} to represent UTF-8 characters is not dependent on
       the use of the /8 modifier on the pattern.  It  is  recognized  always.
       There  may  be  any number of hexadecimal digits inside the braces. The
       result is from one to six bytes,  encoded  according  to  the  original
       UTF-8  rules  of  RFC  2279.  This  allows for values in the range 0 to
       0x7FFFFFFF. Note that not all of those are valid Unicode  code  points,
       or  indeed  valid  UTF-8 characters according to the later rules in RFC
       3629.


THE ALTERNATIVE MATCHING FUNCTION

       By  default,  pcretest  uses  the  standard  PCRE  matching   function,
       pcre_exec() to match each data line. From release 6.0, PCRE supports an
       alternative matching function, pcre_dfa_test(),  which  operates  in  a
       different  way,  and has some restrictions. The differences between the
       two functions are described in the pcrematching documentation.

       If a data line contains the \D escape sequence, or if the command  line
       contains  the -dfa option, the alternative matching function is called.
       This function finds all possible matches at a given point. If, however,
       the  \F escape sequence is present in the data line, it stops after the
       first match is found. This is always the shortest possible match.


DEFAULT OUTPUT FROM PCRETEST

       This section describes the output when the  normal  matching  function,
       pcre_exec(), is being used.

       When a match succeeds, pcretest outputs the list of captured substrings
       that pcre_exec() returns, starting with number 0 for  the  string  that
       matched  the  whole  pattern. Otherwise, it outputs "No match" when the
       return is PCRE_ERROR_NOMATCH, and "Partial match:" followed by the par-
       tially  matching substring when pcre_exec() returns PCRE_ERROR_PARTIAL.
       For any other returns, it outputs the PCRE negative error number.  Here
       is an example of an interactive pcretest run.

         $ pcretest
         PCRE version 7.0 30-Nov-2006

           re> /^abc(\d+)/
         data> abc123
          0: abc123
          1: 123
         data> xyz
         No match

       Note  that unset capturing substrings that are not followed by one that
       is set are not returned by pcre_exec(), and are not shown by  pcretest.
       In  the following example, there are two capturing substrings, but when
       the first data line is matched, the  second,  unset  substring  is  not
       shown.  An "internal" unset substring is shown as "<unset>", as for the
       second data line.

           re> /(a)|(b)/
         data> a
          0: a
          1: a
         data> b
          0: b
          1: <unset>
          2: b

       If the strings contain any non-printing characters, they are output  as
       \0x  escapes,  or  as \x{...} escapes if the /8 modifier was present on
       the pattern. See below for the definition of  non-printing  characters.
       If  the pattern has the /+ modifier, the output for substring 0 is fol-
       lowed by the the rest of the subject string, identified  by  "0+"  like
       this:

           re> /cat/+
         data> cataract
          0: cat
          0+ aract

       If  the  pattern  has  the /g or /G modifier, the results of successive
       matching attempts are output in sequence, like this:

           re> /\Bi(\w\w)/g
         data> Mississippi
          0: iss
          1: ss
          0: iss
          1: ss
          0: ipp
          1: pp

       "No match" is output only if the first match attempt fails.

       If any of the sequences \C, \G, or \L are present in a data  line  that
       is  successfully  matched,  the substrings extracted by the convenience
       functions are output with C, G, or L after the string number instead of
       a colon. This is in addition to the normal full list. The string length
       (that is, the return from the extraction function) is given  in  paren-
       theses after each string for \C and \G.

       Note that whereas patterns can be continued over several lines (a plain
       ">" prompt is used for continuations), data lines may not. However new-
       lines  can  be included in data by means of the \n escape (or \r, \r\n,
       etc., depending on the newline sequence setting).


OUTPUT FROM THE ALTERNATIVE MATCHING FUNCTION

       When the alternative matching function, pcre_dfa_exec(),  is  used  (by
       means  of  the \D escape sequence or the -dfa command line option), the
       output consists of a list of all the matches that start  at  the  first
       point in the subject where there is at least one match. For example:

           re> /(tang|tangerine|tan)/
         data> yellow tangerine\D
          0: tangerine
          1: tang
          2: tan

       (Using  the  normal  matching function on this data finds only "tang".)
       The longest matching string is always given first (and numbered  zero).
       After a PCRE_ERROR_PARTIAL return, the output is "Partial match:", fol-
       lowed by the partially matching substring.

       If /g is present on the pattern, the search for further matches resumes
       at the end of the longest match. For example:

           re> /(tang|tangerine|tan)/g
         data> yellow tangerine and tangy sultana\D
          0: tangerine
          1: tang
          2: tan
          0: tang
          1: tan
          0: tan

       Since  the  matching  function  does not support substring capture, the
       escape sequences that are concerned with captured  substrings  are  not
       relevant.


RESTARTING AFTER A PARTIAL MATCH

       When the alternative matching function has given the PCRE_ERROR_PARTIAL
       return, indicating that the subject partially matched the pattern,  you
       can  restart  the match with additional subject data by means of the \R
       escape sequence. For example:

           re> /^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$/
         data> 23ja\P\D
         Partial match: 23ja
         data> n05\R\D
          0: n05

       For further information about partial  matching,  see  the  pcrepartial
       documentation.


CALLOUTS

       If  the pattern contains any callout requests, pcretest's callout func-
       tion is called during matching. This works  with  both  matching  func-
       tions. By default, the called function displays the callout number, the
       start and current positions in the text at the callout  time,  and  the
       next pattern item to be tested. For example, the output

         --->pqrabcdef
           0    ^  ^     \d

       indicates  that  callout number 0 occurred for a match attempt starting
       at the fourth character of the subject string, when the pointer was  at
       the  seventh  character of the data, and when the next pattern item was
       \d. Just one circumflex is output if the start  and  current  positions
       are the same.

       Callouts numbered 255 are assumed to be automatic callouts, inserted as
       a result of the /C pattern modifier. In this case, instead  of  showing
       the  callout  number, the offset in the pattern, preceded by a plus, is
       output. For example:

           re> /\d?[A-E]\*/C
         data> E*
         --->E*
          +0 ^      \d?
          +3 ^      [A-E]
          +8 ^^     \*
         +10 ^ ^
          0: E*

       The callout function in pcretest returns zero (carry  on  matching)  by
       default,  but you can use a \C item in a data line (as described above)
       to change this.

       Inserting callouts can be helpful when using pcretest to check  compli-
       cated  regular expressions. For further information about callouts, see
       the pcrecallout documentation.


NON-PRINTING CHARACTERS

       When pcretest is outputting text in the compiled version of a  pattern,
       bytes  other  than 32-126 are always treated as non-printing characters
       are are therefore shown as hex escapes.

       When pcretest is outputting text that is a matched part  of  a  subject
       string,  it behaves in the same way, unless a different locale has been
       set for the  pattern  (using  the  /L  modifier).  In  this  case,  the
       isprint() function to distinguish printing and non-printing characters.


SAVING AND RELOADING COMPILED PATTERNS

       The  facilities  described  in  this section are not available when the
       POSIX inteface to PCRE is being used, that is, when the /P pattern mod-
       ifier is specified.

       When the POSIX interface is not in use, you can cause pcretest to write
       a compiled pattern to a file, by following the modifiers with >  and  a
       file name.  For example:

         /pattern/im >/some/file

       See  the pcreprecompile documentation for a discussion about saving and
       re-using compiled patterns.

       The data that is written is binary.  The  first  eight  bytes  are  the
       length  of  the  compiled  pattern  data  followed by the length of the
       optional study data, each written as four  bytes  in  big-endian  order
       (most  significant  byte  first). If there is no study data (either the
       pattern was not studied, or studying did not return any data), the sec-
       ond  length  is  zero. The lengths are followed by an exact copy of the
       compiled pattern. If there is additional study data, this follows imme-
       diately  after  the  compiled pattern. After writing the file, pcretest
       expects to read a new pattern.

       A saved pattern can be reloaded into pcretest by specifing < and a file
       name  instead  of  a pattern. The name of the file must not contain a <
       character, as otherwise pcretest will interpret the line as  a  pattern
       delimited by < characters.  For example:

          re> </some/file
         Compiled regex loaded from /some/file
         No study data

       When  the pattern has been loaded, pcretest proceeds to read data lines
       in the usual way.

       You can copy a file written by pcretest to a different host and  reload
       it  there,  even  if the new host has opposite endianness to the one on
       which the pattern was compiled. For example, you can compile on an  i86
       machine and run on a SPARC machine.

       File  names  for  saving and reloading can be absolute or relative, but
       note that the shell facility of expanding a file name that starts  with
       a tilde (~) is not available.

       The  ability to save and reload files in pcretest is intended for test-
       ing and experimentation. It is not intended for production use  because
       only  a  single pattern can be written to a file. Furthermore, there is
       no facility for supplying  custom  character  tables  for  use  with  a
       reloaded  pattern.  If  the  original  pattern was compiled with custom
       tables, an attempt to match a subject string using a  reloaded  pattern
       is  likely to cause pcretest to crash.  Finally, if you attempt to load
       a file that is not in the correct format, the result is undefined.


SEE ALSO

       pcre(3), pcreapi(3), pcrecallout(3),  pcrematching(3),  pcrepartial(d),
       pcrepattern(3), pcreprecompile(3).


AUTHOR

       Philip Hazel
       University Computing Service
       Cambridge CB2 3QH, England.


REVISION

       Last updated: 26 September 2009
       Copyright (c) 1997-2009 University of Cambridge.
usr/share/doc/alt-pcre802-devel/perltest.txt000064400000003104150403561530014665 0ustar00The perltest program
--------------------

The perltest.pl script tests Perl's regular expressions; it has the same
specification as pcretest, and so can be given identical input, except that
input patterns can be followed only by Perl's lower case modifiers and /+ (as
used by pcretest), which is recognized and handled by the program.

The data lines are processed as Perl double-quoted strings, so if they contain
" $ or @ characters, these have to be escaped. For this reason, all such
characters in testinput1, testinput4, testinput6, and testinput11 are escaped
so that they can be used for perltest as well as for pcretest. The special
upper case pattern modifiers such as /A that pcretest recognizes, and its
special data line escapes, are not used in these files. The output should be
identical, apart from the initial identifying banner.

The perltest.pl script can also test UTF-8 features. It recognizes the special
modifier /8 that pcretest uses to invoke UTF-8 functionality. The testinput4
and testinput6 files can be fed to perltest to run compatible UTF-8 tests.
However, it is necessary to add "use utf8;" to the script to make this work
correctly.

The testinput11 file contains tests that use features of Perl 5.10, so does not
work with Perl 5.8.

The other testinput files are not suitable for feeding to perltest.pl, since
they make use of the special upper case modifiers and escapes that pcretest
uses to test some features of PCRE. Some of these files also contains malformed
regular expressions, in order to check that PCRE diagnoses them correctly.

Philip Hazel
October 2009