| Current File : /home/mmdealscpanel/yummmdeals.com/pcre802.tar |
usr/include/pcrecpparg.h 0000644 00000015177 15040356140 0011310 0 ustar 00 // Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
#ifndef _PCRECPPARG_H
#define _PCRECPPARG_H
#include <stdlib.h> // for NULL
#include <string>
#include <pcre.h>
namespace pcrecpp {
class StringPiece;
// Hex/Octal/Binary?
// Special class for parsing into objects that define a ParseFrom() method
template <class T>
class _RE_MatchObject {
public:
static inline bool Parse(const char* str, int n, void* dest) {
if (dest == NULL) return true;
T* object = reinterpret_cast<T*>(dest);
return object->ParseFrom(str, n);
}
};
class PCRECPP_EXP_DEFN Arg {
public:
// Empty constructor so we can declare arrays of Arg
Arg();
// Constructor specially designed for NULL arguments
Arg(void*);
typedef bool (*Parser)(const char* str, int n, void* dest);
// Type-specific parsers
#define PCRE_MAKE_PARSER(type,name) \
Arg(type* p) : arg_(p), parser_(name) { } \
Arg(type* p, Parser parser) : arg_(p), parser_(parser) { }
PCRE_MAKE_PARSER(char, parse_char);
PCRE_MAKE_PARSER(unsigned char, parse_uchar);
PCRE_MAKE_PARSER(short, parse_short);
PCRE_MAKE_PARSER(unsigned short, parse_ushort);
PCRE_MAKE_PARSER(int, parse_int);
PCRE_MAKE_PARSER(unsigned int, parse_uint);
PCRE_MAKE_PARSER(long, parse_long);
PCRE_MAKE_PARSER(unsigned long, parse_ulong);
#if 1
PCRE_MAKE_PARSER(long long, parse_longlong);
#endif
#if 1
PCRE_MAKE_PARSER(unsigned long long, parse_ulonglong);
#endif
PCRE_MAKE_PARSER(float, parse_float);
PCRE_MAKE_PARSER(double, parse_double);
PCRE_MAKE_PARSER(std::string, parse_string);
PCRE_MAKE_PARSER(StringPiece, parse_stringpiece);
#undef PCRE_MAKE_PARSER
// Generic constructor
template <class T> Arg(T*, Parser parser);
// Generic constructor template
template <class T> Arg(T* p)
: arg_(p), parser_(_RE_MatchObject<T>::Parse) {
}
// Parse the data
bool Parse(const char* str, int n) const;
private:
void* arg_;
Parser parser_;
static bool parse_null (const char* str, int n, void* dest);
static bool parse_char (const char* str, int n, void* dest);
static bool parse_uchar (const char* str, int n, void* dest);
static bool parse_float (const char* str, int n, void* dest);
static bool parse_double (const char* str, int n, void* dest);
static bool parse_string (const char* str, int n, void* dest);
static bool parse_stringpiece (const char* str, int n, void* dest);
#define PCRE_DECLARE_INTEGER_PARSER(name) \
private: \
static bool parse_ ## name(const char* str, int n, void* dest); \
static bool parse_ ## name ## _radix( \
const char* str, int n, void* dest, int radix); \
public: \
static bool parse_ ## name ## _hex(const char* str, int n, void* dest); \
static bool parse_ ## name ## _octal(const char* str, int n, void* dest); \
static bool parse_ ## name ## _cradix(const char* str, int n, void* dest)
PCRE_DECLARE_INTEGER_PARSER(short);
PCRE_DECLARE_INTEGER_PARSER(ushort);
PCRE_DECLARE_INTEGER_PARSER(int);
PCRE_DECLARE_INTEGER_PARSER(uint);
PCRE_DECLARE_INTEGER_PARSER(long);
PCRE_DECLARE_INTEGER_PARSER(ulong);
PCRE_DECLARE_INTEGER_PARSER(longlong);
PCRE_DECLARE_INTEGER_PARSER(ulonglong);
#undef PCRE_DECLARE_INTEGER_PARSER
};
inline Arg::Arg() : arg_(NULL), parser_(parse_null) { }
inline Arg::Arg(void* p) : arg_(p), parser_(parse_null) { }
inline bool Arg::Parse(const char* str, int n) const {
return (*parser_)(str, n, arg_);
}
// This part of the parser, appropriate only for ints, deals with bases
#define MAKE_INTEGER_PARSER(type, name) \
inline Arg Hex(type* ptr) { \
return Arg(ptr, Arg::parse_ ## name ## _hex); } \
inline Arg Octal(type* ptr) { \
return Arg(ptr, Arg::parse_ ## name ## _octal); } \
inline Arg CRadix(type* ptr) { \
return Arg(ptr, Arg::parse_ ## name ## _cradix); }
MAKE_INTEGER_PARSER(short, short) /* */
MAKE_INTEGER_PARSER(unsigned short, ushort) /* */
MAKE_INTEGER_PARSER(int, int) /* Don't use semicolons */
MAKE_INTEGER_PARSER(unsigned int, uint) /* after these statement */
MAKE_INTEGER_PARSER(long, long) /* because they can cause */
MAKE_INTEGER_PARSER(unsigned long, ulong) /* compiler warnings if */
#if 1 /* the checking level is */
MAKE_INTEGER_PARSER(long long, longlong) /* turned up high enough. */
#endif /* */
#if 1 /* */
MAKE_INTEGER_PARSER(unsigned long long, ulonglong) /* */
#endif
#undef PCRE_IS_SET
#undef PCRE_SET_OR_CLEAR
#undef MAKE_INTEGER_PARSER
} // namespace pcrecpp
#endif /* _PCRECPPARG_H */
usr/include/pcreposix.h 0000644 00000012400 15040356140 0011160 0 ustar 00 /*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
#ifndef _PCREPOSIX_H
#define _PCREPOSIX_H
/* This is the header for the POSIX wrapper interface to the PCRE Perl-
Compatible Regular Expression library. It defines the things POSIX says should
be there. I hope.
Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
/* Have to include stdlib.h in order to ensure that size_t is defined. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options, mostly defined by POSIX, but with some extras. */
#define REG_ICASE 0x0001 /* Maps to PCRE_CASELESS */
#define REG_NEWLINE 0x0002 /* Maps to PCRE_MULTILINE */
#define REG_NOTBOL 0x0004 /* Maps to PCRE_NOTBOL */
#define REG_NOTEOL 0x0008 /* Maps to PCRE_NOTEOL */
#define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE_DOTALL */
#define REG_NOSUB 0x0020 /* Maps to PCRE_NO_AUTO_CAPTURE */
#define REG_UTF8 0x0040 /* NOT defined by POSIX; maps to PCRE_UTF8 */
#define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */
#define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE_NOTEMPTY */
#define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE_UNGREEDY */
/* This is not used by PCRE, but by defining it we make it easier
to slot PCRE into existing programs that make POSIX calls. */
#define REG_EXTENDED 0
/* Error values. Not all these are relevant or used by the wrapper. */
enum {
REG_ASSERT = 1, /* internal error ? */
REG_BADBR, /* invalid repeat counts in {} */
REG_BADPAT, /* pattern error */
REG_BADRPT, /* ? * + invalid */
REG_EBRACE, /* unbalanced {} */
REG_EBRACK, /* unbalanced [] */
REG_ECOLLATE, /* collation error - not relevant */
REG_ECTYPE, /* bad class */
REG_EESCAPE, /* bad escape sequence */
REG_EMPTY, /* empty expression */
REG_EPAREN, /* unbalanced () */
REG_ERANGE, /* bad range inside [] */
REG_ESIZE, /* expression too big */
REG_ESPACE, /* failed to get memory */
REG_ESUBREG, /* bad back reference */
REG_INVARG, /* bad argument */
REG_NOMATCH /* match failed */
};
/* The structure representing a compiled regular expression. */
typedef struct {
void *re_pcre;
size_t re_nsub;
size_t re_erroffset;
} regex_t;
/* The structure in which a captured offset is returned. */
typedef int regoff_t;
typedef struct {
regoff_t rm_so;
regoff_t rm_eo;
} regmatch_t;
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export settings are needed, and are set in pcreposix.c before including this
file. */
#if defined(_WIN32) && !defined(PCRE_STATIC) && !defined(PCREPOSIX_EXP_DECL)
# define PCREPOSIX_EXP_DECL extern __declspec(dllimport)
# define PCREPOSIX_EXP_DEFN __declspec(dllimport)
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCREPOSIX_EXP_DECL
# ifdef __cplusplus
# define PCREPOSIX_EXP_DECL extern "C"
# define PCREPOSIX_EXP_DEFN extern "C"
# else
# define PCREPOSIX_EXP_DECL extern
# define PCREPOSIX_EXP_DEFN extern
# endif
#endif
/* The functions */
PCREPOSIX_EXP_DECL int regcomp(regex_t *, const char *, int);
PCREPOSIX_EXP_DECL int regexec(const regex_t *, const char *, size_t,
regmatch_t *, int);
PCREPOSIX_EXP_DECL size_t regerror(int, const regex_t *, char *, size_t);
PCREPOSIX_EXP_DECL void regfree(regex_t *);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcreposix.h */
usr/include/pcrecpp.h 0000644 00000063641 15040356141 0010616 0 ustar 00 // Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
// Support for PCRE_XXX modifiers added by Giuseppe Maxia, July 2005
#ifndef _PCRECPP_H
#define _PCRECPP_H
// C++ interface to the pcre regular-expression library. RE supports
// Perl-style regular expressions (with extensions like \d, \w, \s,
// ...).
//
// -----------------------------------------------------------------------
// REGEXP SYNTAX:
//
// This module is part of the pcre library and hence supports its syntax
// for regular expressions.
//
// The syntax is pretty similar to Perl's. For those not familiar
// with Perl's regular expressions, here are some examples of the most
// commonly used extensions:
//
// "hello (\\w+) world" -- \w matches a "word" character
// "version (\\d+)" -- \d matches a digit
// "hello\\s+world" -- \s matches any whitespace character
// "\\b(\\w+)\\b" -- \b matches empty string at a word boundary
// "(?i)hello" -- (?i) turns on case-insensitive matching
// "/\\*(.*?)\\*/" -- .*? matches . minimum no. of times possible
//
// -----------------------------------------------------------------------
// MATCHING INTERFACE:
//
// The "FullMatch" operation checks that supplied text matches a
// supplied pattern exactly.
//
// Example: successful match
// pcrecpp::RE re("h.*o");
// re.FullMatch("hello");
//
// Example: unsuccessful match (requires full match):
// pcrecpp::RE re("e");
// !re.FullMatch("hello");
//
// Example: creating a temporary RE object:
// pcrecpp::RE("h.*o").FullMatch("hello");
//
// You can pass in a "const char*" or a "string" for "text". The
// examples below tend to use a const char*.
//
// You can, as in the different examples above, store the RE object
// explicitly in a variable or use a temporary RE object. The
// examples below use one mode or the other arbitrarily. Either
// could correctly be used for any of these examples.
//
// -----------------------------------------------------------------------
// MATCHING WITH SUB-STRING EXTRACTION:
//
// You can supply extra pointer arguments to extract matched subpieces.
//
// Example: extracts "ruby" into "s" and 1234 into "i"
// int i;
// string s;
// pcrecpp::RE re("(\\w+):(\\d+)");
// re.FullMatch("ruby:1234", &s, &i);
//
// Example: does not try to extract any extra sub-patterns
// re.FullMatch("ruby:1234", &s);
//
// Example: does not try to extract into NULL
// re.FullMatch("ruby:1234", NULL, &i);
//
// Example: integer overflow causes failure
// !re.FullMatch("ruby:1234567891234", NULL, &i);
//
// Example: fails because there aren't enough sub-patterns:
// !pcrecpp::RE("\\w+:\\d+").FullMatch("ruby:1234", &s);
//
// Example: fails because string cannot be stored in integer
// !pcrecpp::RE("(.*)").FullMatch("ruby", &i);
//
// The provided pointer arguments can be pointers to any scalar numeric
// type, or one of
// string (matched piece is copied to string)
// StringPiece (StringPiece is mutated to point to matched piece)
// T (where "bool T::ParseFrom(const char*, int)" exists)
// NULL (the corresponding matched sub-pattern is not copied)
//
// CAVEAT: An optional sub-pattern that does not exist in the matched
// string is assigned the empty string. Therefore, the following will
// return false (because the empty string is not a valid number):
// int number;
// pcrecpp::RE::FullMatch("abc", "[a-z]+(\\d+)?", &number);
//
// -----------------------------------------------------------------------
// DO_MATCH
//
// The matching interface supports at most 16 arguments per call.
// If you need more, consider using the more general interface
// pcrecpp::RE::DoMatch(). See pcrecpp.h for the signature for DoMatch.
//
// -----------------------------------------------------------------------
// PARTIAL MATCHES
//
// You can use the "PartialMatch" operation when you want the pattern
// to match any substring of the text.
//
// Example: simple search for a string:
// pcrecpp::RE("ell").PartialMatch("hello");
//
// Example: find first number in a string:
// int number;
// pcrecpp::RE re("(\\d+)");
// re.PartialMatch("x*100 + 20", &number);
// assert(number == 100);
//
// -----------------------------------------------------------------------
// UTF-8 AND THE MATCHING INTERFACE:
//
// By default, pattern and text are plain text, one byte per character.
// The UTF8 flag, passed to the constructor, causes both pattern
// and string to be treated as UTF-8 text, still a byte stream but
// potentially multiple bytes per character. In practice, the text
// is likelier to be UTF-8 than the pattern, but the match returned
// may depend on the UTF8 flag, so always use it when matching
// UTF8 text. E.g., "." will match one byte normally but with UTF8
// set may match up to three bytes of a multi-byte character.
//
// Example:
// pcrecpp::RE_Options options;
// options.set_utf8();
// pcrecpp::RE re(utf8_pattern, options);
// re.FullMatch(utf8_string);
//
// Example: using the convenience function UTF8():
// pcrecpp::RE re(utf8_pattern, pcrecpp::UTF8());
// re.FullMatch(utf8_string);
//
// NOTE: The UTF8 option is ignored if pcre was not configured with the
// --enable-utf8 flag.
//
// -----------------------------------------------------------------------
// PASSING MODIFIERS TO THE REGULAR EXPRESSION ENGINE
//
// PCRE defines some modifiers to change the behavior of the regular
// expression engine.
// The C++ wrapper defines an auxiliary class, RE_Options, as a vehicle
// to pass such modifiers to a RE class.
//
// Currently, the following modifiers are supported
//
// modifier description Perl corresponding
//
// PCRE_CASELESS case insensitive match /i
// PCRE_MULTILINE multiple lines match /m
// PCRE_DOTALL dot matches newlines /s
// PCRE_DOLLAR_ENDONLY $ matches only at end N/A
// PCRE_EXTRA strict escape parsing N/A
// PCRE_EXTENDED ignore whitespaces /x
// PCRE_UTF8 handles UTF8 chars built-in
// PCRE_UNGREEDY reverses * and *? N/A
// PCRE_NO_AUTO_CAPTURE disables matching parens N/A (*)
//
// (For a full account on how each modifier works, please check the
// PCRE API reference manual).
//
// (*) Both Perl and PCRE allow non matching parentheses by means of the
// "?:" modifier within the pattern itself. e.g. (?:ab|cd) does not
// capture, while (ab|cd) does.
//
// For each modifier, there are two member functions whose name is made
// out of the modifier in lowercase, without the "PCRE_" prefix. For
// instance, PCRE_CASELESS is handled by
// bool caseless(),
// which returns true if the modifier is set, and
// RE_Options & set_caseless(bool),
// which sets or unsets the modifier.
//
// Moreover, PCRE_EXTRA_MATCH_LIMIT can be accessed through the
// set_match_limit() and match_limit() member functions.
// Setting match_limit to a non-zero value will limit the executation of
// pcre to keep it from doing bad things like blowing the stack or taking
// an eternity to return a result. A value of 5000 is good enough to stop
// stack blowup in a 2MB thread stack. Setting match_limit to zero will
// disable match limiting. Alternately, you can set match_limit_recursion()
// which uses PCRE_EXTRA_MATCH_LIMIT_RECURSION to limit how much pcre
// recurses. match_limit() caps the number of matches pcre does;
// match_limit_recrusion() caps the depth of recursion.
//
// Normally, to pass one or more modifiers to a RE class, you declare
// a RE_Options object, set the appropriate options, and pass this
// object to a RE constructor. Example:
//
// RE_options opt;
// opt.set_caseless(true);
//
// if (RE("HELLO", opt).PartialMatch("hello world")) ...
//
// RE_options has two constructors. The default constructor takes no
// arguments and creates a set of flags that are off by default.
//
// The optional parameter 'option_flags' is to facilitate transfer
// of legacy code from C programs. This lets you do
// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
//
// But new code is better off doing
// RE(pattern,
// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
// (See below)
//
// If you are going to pass one of the most used modifiers, there are some
// convenience functions that return a RE_Options class with the
// appropriate modifier already set:
// CASELESS(), UTF8(), MULTILINE(), DOTALL(), EXTENDED()
//
// If you need to set several options at once, and you don't want to go
// through the pains of declaring a RE_Options object and setting several
// options, there is a parallel method that give you such ability on the
// fly. You can concatenate several set_xxxxx member functions, since each
// of them returns a reference to its class object. e.g.: to pass
// PCRE_CASELESS, PCRE_EXTENDED, and PCRE_MULTILINE to a RE with one
// statement, you may write
//
// RE(" ^ xyz \\s+ .* blah$", RE_Options()
// .set_caseless(true)
// .set_extended(true)
// .set_multiline(true)).PartialMatch(sometext);
//
// -----------------------------------------------------------------------
// SCANNING TEXT INCREMENTALLY
//
// The "Consume" operation may be useful if you want to repeatedly
// match regular expressions at the front of a string and skip over
// them as they match. This requires use of the "StringPiece" type,
// which represents a sub-range of a real string. Like RE, StringPiece
// is defined in the pcrecpp namespace.
//
// Example: read lines of the form "var = value" from a string.
// string contents = ...; // Fill string somehow
// pcrecpp::StringPiece input(contents); // Wrap in a StringPiece
//
// string var;
// int value;
// pcrecpp::RE re("(\\w+) = (\\d+)\n");
// while (re.Consume(&input, &var, &value)) {
// ...;
// }
//
// Each successful call to "Consume" will set "var/value", and also
// advance "input" so it points past the matched text.
//
// The "FindAndConsume" operation is similar to "Consume" but does not
// anchor your match at the beginning of the string. For example, you
// could extract all words from a string by repeatedly calling
// pcrecpp::RE("(\\w+)").FindAndConsume(&input, &word)
//
// -----------------------------------------------------------------------
// PARSING HEX/OCTAL/C-RADIX NUMBERS
//
// By default, if you pass a pointer to a numeric value, the
// corresponding text is interpreted as a base-10 number. You can
// instead wrap the pointer with a call to one of the operators Hex(),
// Octal(), or CRadix() to interpret the text in another base. The
// CRadix operator interprets C-style "0" (base-8) and "0x" (base-16)
// prefixes, but defaults to base-10.
//
// Example:
// int a, b, c, d;
// pcrecpp::RE re("(.*) (.*) (.*) (.*)");
// re.FullMatch("100 40 0100 0x40",
// pcrecpp::Octal(&a), pcrecpp::Hex(&b),
// pcrecpp::CRadix(&c), pcrecpp::CRadix(&d));
// will leave 64 in a, b, c, and d.
//
// -----------------------------------------------------------------------
// REPLACING PARTS OF STRINGS
//
// You can replace the first match of "pattern" in "str" with
// "rewrite". Within "rewrite", backslash-escaped digits (\1 to \9)
// can be used to insert text matching corresponding parenthesized
// group from the pattern. \0 in "rewrite" refers to the entire
// matching text. E.g.,
//
// string s = "yabba dabba doo";
// pcrecpp::RE("b+").Replace("d", &s);
//
// will leave "s" containing "yada dabba doo". The result is true if
// the pattern matches and a replacement occurs, or false otherwise.
//
// GlobalReplace() is like Replace(), except that it replaces all
// occurrences of the pattern in the string with the rewrite.
// Replacements are not subject to re-matching. E.g.,
//
// string s = "yabba dabba doo";
// pcrecpp::RE("b+").GlobalReplace("d", &s);
//
// will leave "s" containing "yada dada doo". It returns the number
// of replacements made.
//
// Extract() is like Replace(), except that if the pattern matches,
// "rewrite" is copied into "out" (an additional argument) with
// substitutions. The non-matching portions of "text" are ignored.
// Returns true iff a match occurred and the extraction happened
// successfully. If no match occurs, the string is left unaffected.
#include <string>
#include <pcre.h>
#include <pcrecpparg.h> // defines the Arg class
// This isn't technically needed here, but we include it
// anyway so folks who include pcrecpp.h don't have to.
#include <pcre_stringpiece.h>
namespace pcrecpp {
#define PCRE_SET_OR_CLEAR(b, o) \
if (b) all_options_ |= (o); else all_options_ &= ~(o); \
return *this
#define PCRE_IS_SET(o) \
(all_options_ & o) == o
/***** Compiling regular expressions: the RE class *****/
// RE_Options allow you to set options to be passed along to pcre,
// along with other options we put on top of pcre.
// Only 9 modifiers, plus match_limit and match_limit_recursion,
// are supported now.
class PCRECPP_EXP_DEFN RE_Options {
public:
// constructor
RE_Options() : match_limit_(0), match_limit_recursion_(0), all_options_(0) {}
// alternative constructor.
// To facilitate transfer of legacy code from C programs
//
// This lets you do
// RE(pattern, RE_Options(PCRE_CASELESS|PCRE_MULTILINE)).PartialMatch(str);
// But new code is better off doing
// RE(pattern,
// RE_Options().set_caseless(true).set_multiline(true)).PartialMatch(str);
RE_Options(int option_flags) : match_limit_(0), match_limit_recursion_(0),
all_options_(option_flags) {}
// we're fine with the default destructor, copy constructor, etc.
// accessors and mutators
int match_limit() const { return match_limit_; };
RE_Options &set_match_limit(int limit) {
match_limit_ = limit;
return *this;
}
int match_limit_recursion() const { return match_limit_recursion_; };
RE_Options &set_match_limit_recursion(int limit) {
match_limit_recursion_ = limit;
return *this;
}
bool caseless() const {
return PCRE_IS_SET(PCRE_CASELESS);
}
RE_Options &set_caseless(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_CASELESS);
}
bool multiline() const {
return PCRE_IS_SET(PCRE_MULTILINE);
}
RE_Options &set_multiline(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_MULTILINE);
}
bool dotall() const {
return PCRE_IS_SET(PCRE_DOTALL);
}
RE_Options &set_dotall(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_DOTALL);
}
bool extended() const {
return PCRE_IS_SET(PCRE_EXTENDED);
}
RE_Options &set_extended(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_EXTENDED);
}
bool dollar_endonly() const {
return PCRE_IS_SET(PCRE_DOLLAR_ENDONLY);
}
RE_Options &set_dollar_endonly(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_DOLLAR_ENDONLY);
}
bool extra() const {
return PCRE_IS_SET(PCRE_EXTRA);
}
RE_Options &set_extra(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_EXTRA);
}
bool ungreedy() const {
return PCRE_IS_SET(PCRE_UNGREEDY);
}
RE_Options &set_ungreedy(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_UNGREEDY);
}
bool utf8() const {
return PCRE_IS_SET(PCRE_UTF8);
}
RE_Options &set_utf8(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_UTF8);
}
bool no_auto_capture() const {
return PCRE_IS_SET(PCRE_NO_AUTO_CAPTURE);
}
RE_Options &set_no_auto_capture(bool x) {
PCRE_SET_OR_CLEAR(x, PCRE_NO_AUTO_CAPTURE);
}
RE_Options &set_all_options(int opt) {
all_options_ = opt;
return *this;
}
int all_options() const {
return all_options_ ;
}
// TODO: add other pcre flags
private:
int match_limit_;
int match_limit_recursion_;
int all_options_;
};
// These functions return some common RE_Options
static inline RE_Options UTF8() {
return RE_Options().set_utf8(true);
}
static inline RE_Options CASELESS() {
return RE_Options().set_caseless(true);
}
static inline RE_Options MULTILINE() {
return RE_Options().set_multiline(true);
}
static inline RE_Options DOTALL() {
return RE_Options().set_dotall(true);
}
static inline RE_Options EXTENDED() {
return RE_Options().set_extended(true);
}
// Interface for regular expression matching. Also corresponds to a
// pre-compiled regular expression. An "RE" object is safe for
// concurrent use by multiple threads.
class PCRECPP_EXP_DEFN RE {
public:
// We provide implicit conversions from strings so that users can
// pass in a string or a "const char*" wherever an "RE" is expected.
RE(const string& pat) { Init(pat, NULL); }
RE(const string& pat, const RE_Options& option) { Init(pat, &option); }
RE(const char* pat) { Init(pat, NULL); }
RE(const char* pat, const RE_Options& option) { Init(pat, &option); }
RE(const unsigned char* pat) {
Init(reinterpret_cast<const char*>(pat), NULL);
}
RE(const unsigned char* pat, const RE_Options& option) {
Init(reinterpret_cast<const char*>(pat), &option);
}
// Copy constructor & assignment - note that these are expensive
// because they recompile the expression.
RE(const RE& re) { Init(re.pattern_, &re.options_); }
const RE& operator=(const RE& re) {
if (this != &re) {
Cleanup();
// This is the code that originally came from Google
// Init(re.pattern_.c_str(), &re.options_);
// This is the replacement from Ari Pollak
Init(re.pattern_, &re.options_);
}
return *this;
}
~RE();
// The string specification for this RE. E.g.
// RE re("ab*c?d+");
// re.pattern(); // "ab*c?d+"
const string& pattern() const { return pattern_; }
// If RE could not be created properly, returns an error string.
// Else returns the empty string.
const string& error() const { return *error_; }
/***** The useful part: the matching interface *****/
// This is provided so one can do pattern.ReplaceAll() just as
// easily as ReplaceAll(pattern-text, ....)
bool FullMatch(const StringPiece& text,
const Arg& ptr1 = no_arg,
const Arg& ptr2 = no_arg,
const Arg& ptr3 = no_arg,
const Arg& ptr4 = no_arg,
const Arg& ptr5 = no_arg,
const Arg& ptr6 = no_arg,
const Arg& ptr7 = no_arg,
const Arg& ptr8 = no_arg,
const Arg& ptr9 = no_arg,
const Arg& ptr10 = no_arg,
const Arg& ptr11 = no_arg,
const Arg& ptr12 = no_arg,
const Arg& ptr13 = no_arg,
const Arg& ptr14 = no_arg,
const Arg& ptr15 = no_arg,
const Arg& ptr16 = no_arg) const;
bool PartialMatch(const StringPiece& text,
const Arg& ptr1 = no_arg,
const Arg& ptr2 = no_arg,
const Arg& ptr3 = no_arg,
const Arg& ptr4 = no_arg,
const Arg& ptr5 = no_arg,
const Arg& ptr6 = no_arg,
const Arg& ptr7 = no_arg,
const Arg& ptr8 = no_arg,
const Arg& ptr9 = no_arg,
const Arg& ptr10 = no_arg,
const Arg& ptr11 = no_arg,
const Arg& ptr12 = no_arg,
const Arg& ptr13 = no_arg,
const Arg& ptr14 = no_arg,
const Arg& ptr15 = no_arg,
const Arg& ptr16 = no_arg) const;
bool Consume(StringPiece* input,
const Arg& ptr1 = no_arg,
const Arg& ptr2 = no_arg,
const Arg& ptr3 = no_arg,
const Arg& ptr4 = no_arg,
const Arg& ptr5 = no_arg,
const Arg& ptr6 = no_arg,
const Arg& ptr7 = no_arg,
const Arg& ptr8 = no_arg,
const Arg& ptr9 = no_arg,
const Arg& ptr10 = no_arg,
const Arg& ptr11 = no_arg,
const Arg& ptr12 = no_arg,
const Arg& ptr13 = no_arg,
const Arg& ptr14 = no_arg,
const Arg& ptr15 = no_arg,
const Arg& ptr16 = no_arg) const;
bool FindAndConsume(StringPiece* input,
const Arg& ptr1 = no_arg,
const Arg& ptr2 = no_arg,
const Arg& ptr3 = no_arg,
const Arg& ptr4 = no_arg,
const Arg& ptr5 = no_arg,
const Arg& ptr6 = no_arg,
const Arg& ptr7 = no_arg,
const Arg& ptr8 = no_arg,
const Arg& ptr9 = no_arg,
const Arg& ptr10 = no_arg,
const Arg& ptr11 = no_arg,
const Arg& ptr12 = no_arg,
const Arg& ptr13 = no_arg,
const Arg& ptr14 = no_arg,
const Arg& ptr15 = no_arg,
const Arg& ptr16 = no_arg) const;
bool Replace(const StringPiece& rewrite,
string *str) const;
int GlobalReplace(const StringPiece& rewrite,
string *str) const;
bool Extract(const StringPiece &rewrite,
const StringPiece &text,
string *out) const;
// Escapes all potentially meaningful regexp characters in
// 'unquoted'. The returned string, used as a regular expression,
// will exactly match the original string. For example,
// 1.5-2.0?
// may become:
// 1\.5\-2\.0\?
// Note QuoteMeta behaves the same as perl's QuoteMeta function,
// *except* that it escapes the NUL character (\0) as backslash + 0,
// rather than backslash + NUL.
static string QuoteMeta(const StringPiece& unquoted);
/***** Generic matching interface *****/
// Type of match (TODO: Should be restructured as part of RE_Options)
enum Anchor {
UNANCHORED, // No anchoring
ANCHOR_START, // Anchor at start only
ANCHOR_BOTH // Anchor at start and end
};
// General matching routine. Stores the length of the match in
// "*consumed" if successful.
bool DoMatch(const StringPiece& text,
Anchor anchor,
int* consumed,
const Arg* const* args, int n) const;
// Return the number of capturing subpatterns, or -1 if the
// regexp wasn't valid on construction.
int NumberOfCapturingGroups() const;
// The default value for an argument, to indicate the end of the argument
// list. This must be used only in optional argument defaults. It should NOT
// be passed explicitly. Some people have tried to use it like this:
//
// FullMatch(x, y, &z, no_arg, &w);
//
// This is a mistake, and will not work.
static Arg no_arg;
private:
void Init(const string& pattern, const RE_Options* options);
void Cleanup();
// Match against "text", filling in "vec" (up to "vecsize" * 2/3) with
// pairs of integers for the beginning and end positions of matched
// text. The first pair corresponds to the entire matched text;
// subsequent pairs correspond, in order, to parentheses-captured
// matches. Returns the number of pairs (one more than the number of
// the last subpattern with a match) if matching was successful
// and zero if the match failed.
// I.e. for RE("(foo)|(bar)|(baz)") it will return 2, 3, and 4 when matching
// against "foo", "bar", and "baz" respectively.
// When matching RE("(foo)|hello") against "hello", it will return 1.
// But the values for all subpattern are filled in into "vec".
int TryMatch(const StringPiece& text,
int startpos,
Anchor anchor,
bool empty_ok,
int *vec,
int vecsize) const;
// Append the "rewrite" string, with backslash subsitutions from "text"
// and "vec", to string "out".
bool Rewrite(string *out,
const StringPiece& rewrite,
const StringPiece& text,
int *vec,
int veclen) const;
// internal implementation for DoMatch
bool DoMatchImpl(const StringPiece& text,
Anchor anchor,
int* consumed,
const Arg* const args[],
int n,
int* vec,
int vecsize) const;
// Compile the regexp for the specified anchoring mode
pcre* Compile(Anchor anchor);
string pattern_;
RE_Options options_;
pcre* re_full_; // For full matches
pcre* re_partial_; // For partial matches
const string* error_; // Error indicator (or points to empty string)
};
} // namespace pcrecpp
#endif /* _PCRECPP_H */
usr/include/pcre_stringpiece.h 0000644 00000014110 15040356141 0012472 0 ustar 00 // Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
//
// A string like object that points into another piece of memory.
// Useful for providing an interface that allows clients to easily
// pass in either a "const char*" or a "string".
//
// Arghh! I wish C++ literals were automatically of type "string".
#ifndef _PCRE_STRINGPIECE_H
#define _PCRE_STRINGPIECE_H
#include <string.h>
#include <string>
#include <iosfwd> // for ostream forward-declaration
#if 0
#define HAVE_TYPE_TRAITS
#include <type_traits.h>
#elif 0
#define HAVE_TYPE_TRAITS
#include <bits/type_traits.h>
#endif
#include <pcre.h>
using std::string;
namespace pcrecpp {
class PCRECPP_EXP_DEFN StringPiece {
private:
const char* ptr_;
int length_;
public:
// We provide non-explicit singleton constructors so users can pass
// in a "const char*" or a "string" wherever a "StringPiece" is
// expected.
StringPiece()
: ptr_(NULL), length_(0) { }
StringPiece(const char* str)
: ptr_(str), length_(static_cast<int>(strlen(ptr_))) { }
StringPiece(const unsigned char* str)
: ptr_(reinterpret_cast<const char*>(str)),
length_(static_cast<int>(strlen(ptr_))) { }
StringPiece(const string& str)
: ptr_(str.data()), length_(static_cast<int>(str.size())) { }
StringPiece(const char* offset, int len)
: ptr_(offset), length_(len) { }
// data() may return a pointer to a buffer with embedded NULs, and the
// returned buffer may or may not be null terminated. Therefore it is
// typically a mistake to pass data() to a routine that expects a NUL
// terminated string. Use "as_string().c_str()" if you really need to do
// this. Or better yet, change your routine so it does not rely on NUL
// termination.
const char* data() const { return ptr_; }
int size() const { return length_; }
bool empty() const { return length_ == 0; }
void clear() { ptr_ = NULL; length_ = 0; }
void set(const char* buffer, int len) { ptr_ = buffer; length_ = len; }
void set(const char* str) {
ptr_ = str;
length_ = static_cast<int>(strlen(str));
}
void set(const void* buffer, int len) {
ptr_ = reinterpret_cast<const char*>(buffer);
length_ = len;
}
char operator[](int i) const { return ptr_[i]; }
void remove_prefix(int n) {
ptr_ += n;
length_ -= n;
}
void remove_suffix(int n) {
length_ -= n;
}
bool operator==(const StringPiece& x) const {
return ((length_ == x.length_) &&
(memcmp(ptr_, x.ptr_, length_) == 0));
}
bool operator!=(const StringPiece& x) const {
return !(*this == x);
}
#define STRINGPIECE_BINARY_PREDICATE(cmp,auxcmp) \
bool operator cmp (const StringPiece& x) const { \
int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_); \
return ((r auxcmp 0) || ((r == 0) && (length_ cmp x.length_))); \
}
STRINGPIECE_BINARY_PREDICATE(<, <);
STRINGPIECE_BINARY_PREDICATE(<=, <);
STRINGPIECE_BINARY_PREDICATE(>=, >);
STRINGPIECE_BINARY_PREDICATE(>, >);
#undef STRINGPIECE_BINARY_PREDICATE
int compare(const StringPiece& x) const {
int r = memcmp(ptr_, x.ptr_, length_ < x.length_ ? length_ : x.length_);
if (r == 0) {
if (length_ < x.length_) r = -1;
else if (length_ > x.length_) r = +1;
}
return r;
}
string as_string() const {
return string(data(), size());
}
void CopyToString(string* target) const {
target->assign(ptr_, length_);
}
// Does "this" start with "x"
bool starts_with(const StringPiece& x) const {
return ((length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0));
}
};
} // namespace pcrecpp
// ------------------------------------------------------------------
// Functions used to create STL containers that use StringPiece
// Remember that a StringPiece's lifetime had better be less than
// that of the underlying string or char*. If it is not, then you
// cannot safely store a StringPiece into an STL container
// ------------------------------------------------------------------
#ifdef HAVE_TYPE_TRAITS
// This makes vector<StringPiece> really fast for some STL implementations
template<> struct __type_traits<pcrecpp::StringPiece> {
typedef __true_type has_trivial_default_constructor;
typedef __true_type has_trivial_copy_constructor;
typedef __true_type has_trivial_assignment_operator;
typedef __true_type has_trivial_destructor;
typedef __true_type is_POD_type;
};
#endif
// allow StringPiece to be logged
std::ostream& operator<<(std::ostream& o, const pcrecpp::StringPiece& piece);
#endif /* _PCRE_STRINGPIECE_H */
usr/include/pcre.h 0000644 00000031112 15040356142 0010100 0 ustar 00 /*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
/* This is the public header file for the PCRE library, to be #included by
applications that call the PCRE functions.
Copyright (c) 1997-2009 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the University of Cambridge nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef _PCRE_H
#define _PCRE_H
/* The current PCRE version information. */
#define PCRE_MAJOR 8
#define PCRE_MINOR 02
#define PCRE_PRERELEASE
#define PCRE_DATE 2010-03-19
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
export setting is defined in pcre_internal.h, which includes this file. So we
don't change existing definitions of PCRE_EXP_DECL and PCRECPP_EXP_DECL. */
#if defined(_WIN32) && !defined(PCRE_STATIC)
# ifndef PCRE_EXP_DECL
# define PCRE_EXP_DECL extern __declspec(dllimport)
# endif
# ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern __declspec(dllimport)
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN __declspec(dllimport)
# endif
# endif
#endif
/* By default, we use the standard "extern" declarations. */
#ifndef PCRE_EXP_DECL
# ifdef __cplusplus
# define PCRE_EXP_DECL extern "C"
# else
# define PCRE_EXP_DECL extern
# endif
#endif
#ifdef __cplusplus
# ifndef PCRECPP_EXP_DECL
# define PCRECPP_EXP_DECL extern
# endif
# ifndef PCRECPP_EXP_DEFN
# define PCRECPP_EXP_DEFN
# endif
#endif
/* Have to include stdlib.h in order to ensure that size_t is defined;
it is needed here for malloc. */
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options. Some are compile-time only, some are run-time only, and some are
both, so we keep them all distinct. */
#define PCRE_CASELESS 0x00000001
#define PCRE_MULTILINE 0x00000002
#define PCRE_DOTALL 0x00000004
#define PCRE_EXTENDED 0x00000008
#define PCRE_ANCHORED 0x00000010
#define PCRE_DOLLAR_ENDONLY 0x00000020
#define PCRE_EXTRA 0x00000040
#define PCRE_NOTBOL 0x00000080
#define PCRE_NOTEOL 0x00000100
#define PCRE_UNGREEDY 0x00000200
#define PCRE_NOTEMPTY 0x00000400
#define PCRE_UTF8 0x00000800
#define PCRE_NO_AUTO_CAPTURE 0x00001000
#define PCRE_NO_UTF8_CHECK 0x00002000
#define PCRE_AUTO_CALLOUT 0x00004000
#define PCRE_PARTIAL_SOFT 0x00008000
#define PCRE_PARTIAL 0x00008000 /* Backwards compatible synonym */
#define PCRE_DFA_SHORTEST 0x00010000
#define PCRE_DFA_RESTART 0x00020000
#define PCRE_FIRSTLINE 0x00040000
#define PCRE_DUPNAMES 0x00080000
#define PCRE_NEWLINE_CR 0x00100000
#define PCRE_NEWLINE_LF 0x00200000
#define PCRE_NEWLINE_CRLF 0x00300000
#define PCRE_NEWLINE_ANY 0x00400000
#define PCRE_NEWLINE_ANYCRLF 0x00500000
#define PCRE_BSR_ANYCRLF 0x00800000
#define PCRE_BSR_UNICODE 0x01000000
#define PCRE_JAVASCRIPT_COMPAT 0x02000000
#define PCRE_NO_START_OPTIMIZE 0x04000000
#define PCRE_NO_START_OPTIMISE 0x04000000
#define PCRE_PARTIAL_HARD 0x08000000
#define PCRE_NOTEMPTY_ATSTART 0x10000000
/* Exec-time and get/set-time error codes */
#define PCRE_ERROR_NOMATCH (-1)
#define PCRE_ERROR_NULL (-2)
#define PCRE_ERROR_BADOPTION (-3)
#define PCRE_ERROR_BADMAGIC (-4)
#define PCRE_ERROR_UNKNOWN_OPCODE (-5)
#define PCRE_ERROR_UNKNOWN_NODE (-5) /* For backward compatibility */
#define PCRE_ERROR_NOMEMORY (-6)
#define PCRE_ERROR_NOSUBSTRING (-7)
#define PCRE_ERROR_MATCHLIMIT (-8)
#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */
#define PCRE_ERROR_BADUTF8 (-10)
#define PCRE_ERROR_BADUTF8_OFFSET (-11)
#define PCRE_ERROR_PARTIAL (-12)
#define PCRE_ERROR_BADPARTIAL (-13)
#define PCRE_ERROR_INTERNAL (-14)
#define PCRE_ERROR_BADCOUNT (-15)
#define PCRE_ERROR_DFA_UITEM (-16)
#define PCRE_ERROR_DFA_UCOND (-17)
#define PCRE_ERROR_DFA_UMLIMIT (-18)
#define PCRE_ERROR_DFA_WSSIZE (-19)
#define PCRE_ERROR_DFA_RECURSE (-20)
#define PCRE_ERROR_RECURSIONLIMIT (-21)
#define PCRE_ERROR_NULLWSLIMIT (-22) /* No longer actually used */
#define PCRE_ERROR_BADNEWLINE (-23)
/* Request types for pcre_fullinfo() */
#define PCRE_INFO_OPTIONS 0
#define PCRE_INFO_SIZE 1
#define PCRE_INFO_CAPTURECOUNT 2
#define PCRE_INFO_BACKREFMAX 3
#define PCRE_INFO_FIRSTBYTE 4
#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */
#define PCRE_INFO_FIRSTTABLE 5
#define PCRE_INFO_LASTLITERAL 6
#define PCRE_INFO_NAMEENTRYSIZE 7
#define PCRE_INFO_NAMECOUNT 8
#define PCRE_INFO_NAMETABLE 9
#define PCRE_INFO_STUDYSIZE 10
#define PCRE_INFO_DEFAULT_TABLES 11
#define PCRE_INFO_OKPARTIAL 12
#define PCRE_INFO_JCHANGED 13
#define PCRE_INFO_HASCRORLF 14
#define PCRE_INFO_MINLENGTH 15
/* Request types for pcre_config(). Do not re-arrange, in order to remain
compatible. */
#define PCRE_CONFIG_UTF8 0
#define PCRE_CONFIG_NEWLINE 1
#define PCRE_CONFIG_LINK_SIZE 2
#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3
#define PCRE_CONFIG_MATCH_LIMIT 4
#define PCRE_CONFIG_STACKRECURSE 5
#define PCRE_CONFIG_UNICODE_PROPERTIES 6
#define PCRE_CONFIG_MATCH_LIMIT_RECURSION 7
#define PCRE_CONFIG_BSR 8
/* Bit flags for the pcre_extra structure. Do not re-arrange or redefine
these bits, just add new ones on the end, in order to remain compatible. */
#define PCRE_EXTRA_STUDY_DATA 0x0001
#define PCRE_EXTRA_MATCH_LIMIT 0x0002
#define PCRE_EXTRA_CALLOUT_DATA 0x0004
#define PCRE_EXTRA_TABLES 0x0008
#define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0x0010
/* Types */
struct real_pcre; /* declaration; the definition is private */
typedef struct real_pcre pcre;
/* When PCRE is compiled as a C++ library, the subject pointer type can be
replaced with a custom type. For conventional use, the public interface is a
const char *. */
#ifndef PCRE_SPTR
#define PCRE_SPTR const char *
#endif
/* The structure for passing additional data to pcre_exec(). This is defined in
such as way as to be extensible. Always add new fields at the end, in order to
remain compatible. */
typedef struct pcre_extra {
unsigned long int flags; /* Bits for which fields are set */
void *study_data; /* Opaque data from pcre_study() */
unsigned long int match_limit; /* Maximum number of calls to match() */
void *callout_data; /* Data passed back in callouts */
const unsigned char *tables; /* Pointer to character tables */
unsigned long int match_limit_recursion; /* Max recursive calls to match() */
} pcre_extra;
/* The structure for passing out data via the pcre_callout_function. We use a
structure so that new fields can be added on the end in future versions,
without changing the API of the function, thereby allowing old clients to work
without modification. */
typedef struct pcre_callout_block {
int version; /* Identifies version of block */
/* ------------------------ Version 0 ------------------------------- */
int callout_number; /* Number compiled into pattern */
int *offset_vector; /* The offset vector */
PCRE_SPTR subject; /* The subject being matched */
int subject_length; /* The length of the subject */
int start_match; /* Offset to start of this match attempt */
int current_position; /* Where we currently are in the subject */
int capture_top; /* Max current capture */
int capture_last; /* Most recently closed capture */
void *callout_data; /* Data passed in with the call */
/* ------------------- Added for Version 1 -------------------------- */
int pattern_position; /* Offset to next item in the pattern */
int next_item_length; /* Length of next item in the pattern */
/* ------------------------------------------------------------------ */
} pcre_callout_block;
/* Indirection for store get and free functions. These can be set to
alternative malloc/free functions if required. Special ones are used in the
non-recursive case for "frames". There is also an optional callout function
that is triggered by the (?) regex item. For Virtual Pascal, these definitions
have to take another form. */
#ifndef VPCOMPAT
PCRE_EXP_DECL void *(*pcre_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_free)(void *);
PCRE_EXP_DECL void *(*pcre_stack_malloc)(size_t);
PCRE_EXP_DECL void (*pcre_stack_free)(void *);
PCRE_EXP_DECL int (*pcre_callout)(pcre_callout_block *);
#else /* VPCOMPAT */
PCRE_EXP_DECL void *pcre_malloc(size_t);
PCRE_EXP_DECL void pcre_free(void *);
PCRE_EXP_DECL void *pcre_stack_malloc(size_t);
PCRE_EXP_DECL void pcre_stack_free(void *);
PCRE_EXP_DECL int pcre_callout(pcre_callout_block *);
#endif /* VPCOMPAT */
/* Exported PCRE functions */
PCRE_EXP_DECL pcre *pcre_compile(const char *, int, const char **, int *,
const unsigned char *);
PCRE_EXP_DECL pcre *pcre_compile2(const char *, int, int *, const char **,
int *, const unsigned char *);
PCRE_EXP_DECL int pcre_config(int, void *);
PCRE_EXP_DECL int pcre_copy_named_substring(const pcre *, const char *,
int *, int, const char *, char *, int);
PCRE_EXP_DECL int pcre_copy_substring(const char *, int *, int, int, char *,
int);
PCRE_EXP_DECL int pcre_dfa_exec(const pcre *, const pcre_extra *,
const char *, int, int, int, int *, int , int *, int);
PCRE_EXP_DECL int pcre_exec(const pcre *, const pcre_extra *, PCRE_SPTR,
int, int, int, int *, int);
PCRE_EXP_DECL void pcre_free_substring(const char *);
PCRE_EXP_DECL void pcre_free_substring_list(const char **);
PCRE_EXP_DECL int pcre_fullinfo(const pcre *, const pcre_extra *, int,
void *);
PCRE_EXP_DECL int pcre_get_named_substring(const pcre *, const char *,
int *, int, const char *, const char **);
PCRE_EXP_DECL int pcre_get_stringnumber(const pcre *, const char *);
PCRE_EXP_DECL int pcre_get_stringtable_entries(const pcre *, const char *,
char **, char **);
PCRE_EXP_DECL int pcre_get_substring(const char *, int *, int, int,
const char **);
PCRE_EXP_DECL int pcre_get_substring_list(const char *, int *, int,
const char ***);
PCRE_EXP_DECL int pcre_info(const pcre *, int *, int *);
PCRE_EXP_DECL const unsigned char *pcre_maketables(void);
PCRE_EXP_DECL int pcre_refcount(pcre *, int);
PCRE_EXP_DECL pcre_extra *pcre_study(const pcre *, int, const char **);
PCRE_EXP_DECL const char *pcre_version(void);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcre.h */
usr/include/pcre_scanner.h 0000644 00000014710 15040356142 0011616 0 ustar 00 // Copyright (c) 2005, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Author: Sanjay Ghemawat
//
// Regular-expression based scanner for parsing an input stream.
//
// Example 1: parse a sequence of "var = number" entries from input:
//
// Scanner scanner(input);
// string var;
// int number;
// scanner.SetSkipExpression("\\s+"); // Skip any white space we encounter
// while (scanner.Consume("(\\w+) = (\\d+)", &var, &number)) {
// ...;
// }
#ifndef _PCRE_SCANNER_H
#define _PCRE_SCANNER_H
#include <assert.h>
#include <string>
#include <vector>
#include <pcrecpp.h>
#include <pcre_stringpiece.h>
namespace pcrecpp {
class PCRECPP_EXP_DEFN Scanner {
public:
Scanner();
explicit Scanner(const std::string& input);
~Scanner();
// Return current line number. The returned line-number is
// one-based. I.e. it returns 1 + the number of consumed newlines.
//
// Note: this method may be slow. It may take time proportional to
// the size of the input.
int LineNumber() const;
// Return the byte-offset that the scanner is looking in the
// input data;
int Offset() const;
// Return true iff the start of the remaining input matches "re"
bool LookingAt(const RE& re) const;
// Return true iff all of the following are true
// a. the start of the remaining input matches "re",
// b. if any arguments are supplied, matched sub-patterns can be
// parsed and stored into the arguments.
// If it returns true, it skips over the matched input and any
// following input that matches the "skip" regular expression.
bool Consume(const RE& re,
const Arg& arg0 = RE::no_arg,
const Arg& arg1 = RE::no_arg,
const Arg& arg2 = RE::no_arg
// TODO: Allow more arguments?
);
// Set the "skip" regular expression. If after consuming some data,
// a prefix of the input matches this RE, it is automatically
// skipped. For example, a programming language scanner would use
// a skip RE that matches white space and comments.
//
// scanner.SetSkipExpression("\\s+|//.*|/[*](.|\n)*?[*]/");
//
// Skipping repeats as long as it succeeds. We used to let people do
// this by writing "(...)*" in the regular expression, but that added
// up to lots of recursive calls within the pcre library, so now we
// control repetition explicitly via the function call API.
//
// You can pass NULL for "re" if you do not want any data to be skipped.
void Skip(const char* re); // DEPRECATED; does *not* repeat
void SetSkipExpression(const char* re);
// Temporarily pause "skip"ing. This
// Skip("Foo"); code ; DisableSkip(); code; EnableSkip()
// is similar to
// Skip("Foo"); code ; Skip(NULL); code ; Skip("Foo");
// but avoids creating/deleting new RE objects.
void DisableSkip();
// Reenable previously paused skipping. Any prefix of the input
// that matches the skip pattern is immediately dropped.
void EnableSkip();
/***** Special wrappers around SetSkip() for some common idioms *****/
// Arranges to skip whitespace, C comments, C++ comments.
// The overall RE is a disjunction of the following REs:
// \\s whitespace
// //.*\n C++ comment
// /[*](.|\n)*?[*]/ C comment (x*? means minimal repetitions of x)
// We get repetition via the semantics of SetSkipExpression, not by using *
void SkipCXXComments() {
SetSkipExpression("\\s|//.*\n|/[*](?:\n|.)*?[*]/");
}
void set_save_comments(bool comments) {
save_comments_ = comments;
}
bool save_comments() {
return save_comments_;
}
// Append to vector ranges the comments found in the
// byte range [start,end] (inclusive) of the input data.
// Only comments that were extracted entirely within that
// range are returned: no range splitting of atomically-extracted
// comments is performed.
void GetComments(int start, int end, std::vector<StringPiece> *ranges);
// Append to vector ranges the comments added
// since the last time this was called. This
// functionality is provided for efficiency when
// interleaving scanning with parsing.
void GetNextComments(std::vector<StringPiece> *ranges);
private:
std::string data_; // All the input data
StringPiece input_; // Unprocessed input
RE* skip_; // If non-NULL, RE for skipping input
bool should_skip_; // If true, use skip_
bool skip_repeat_; // If true, repeat skip_ as long as it works
bool save_comments_; // If true, aggregate the skip expression
// the skipped comments
// TODO: later consider requiring that the StringPieces be added
// in order by their start position
std::vector<StringPiece> *comments_;
// the offset into comments_ that has been returned by GetNextComments
int comments_offset_;
// helper function to consume *skip_ and honour
// save_comments_
void ConsumeSkip();
};
} // namespace pcrecpp
#endif /* _PCRE_SCANNER_H */
usr/bin/pcregrep 0000755 00000114770 15040356142 0007674 0 ustar 00 ELF >