Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Fri, 21 Apr 2023 17:48:28 +0200
From: наб <nabijaczleweli@...ijaczleweli.xyz>
To: musl@...ts.openwall.com
Subject: REG_STARTEND tests

I didn't formalise the tests last night, but see the attached file,
which passes cleanly on NetBSD and the illumos gate.

On musl with 1/2 I get
$ ~/store/code/musl/prefix/bin/musl-clang tst-reg-startend.c -o \
    /tmp/tst-reg-startend  -DSTANDALONE  -static &&
	/tmp/tst-reg-startend 2>&1 | cat -A
tst-reg-startend.c: ^a.c$: ac: no match
tst-reg-startend.c: ^a.*c$: ac: no match
tst-reg-startend.c: ^a[^c]c$: ac: no match
tst-reg-startend.c: ^a..: ac: no match
tst-reg-startend.c: ..c: ac: no match
tst-reg-startend.c: [^z]c: ac: no match
tst-reg-startend.c: [^z]c: ac: wanted {2, 4}, got {1, 4}

And with 2/2 it passes cleanly.


glibc gives me a host of errors, but I'll post fixes and include this
test there later.

Best,
наб

Please keep me in CC.

/* Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted.

   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */

#include <assert.h>
#include <locale.h>
#include <string.h>
#include <regex.h>
#include <stdio.h>
#include <stdbool.h>


#define M(s, e) (regmatch_t) {.rm_so = s, .rm_eo = e}
#define MEQ(l, r) ((l).rm_so == (r).rm_so && (l).rm_eo == (r).rm_eo)

static const regmatch_t bound = M(1, 4);

static const char *const regex_ac[] =
  {"^a", "c$", "^a.c$", "^a.*c$", "^a[^c]c$", "^a..", "..c", "[^z]c", NULL};
static const char *const regex_aa[] =
  {"^a", "a$", "^\\(a\\).\\1$", "^a[^a]*", NULL};
static const char *const data_ac[] = {"_a\0cdef", "_abcdef"};
static const char *const data_aa[] = {"_a\0adef", "_abadef"};
static const regmatch_t results_ac[] =
  {M(1, 2), M(3, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(2, 4)};
static const regmatch_t results_aa[] =
  {M(1, 2), M(3, 4), M(1, 4), M(1, 3)};
static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
              sizeof(results_ac) / sizeof(*results_ac), "");
static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
              sizeof(results_aa) / sizeof(*results_aa), "");


static bool
testbunch (const char *const *regexes, const char *const data[static 2],
           const regmatch_t *results)
{
#define BASEERR(data)                              \
  err = true,                                      \
    fprintf (stderr, __FILE__ ": %s: ", *regexes), \
    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stderr)

  bool err = false;
  for (; *regexes; ++regexes, ++results)
    {
      regex_t rgx;
      assert (!regcomp (&rgx, *regexes, 0));

      for (size_t i = 0; i < 2; ++i)
        {
          regmatch_t match = bound;
          if (regexec (&rgx, data[i], 1, &match, REG_STARTEND))
            BASEERR(data), fputs (": no match\n", stderr);

          if (!MEQ(match, *results))
            BASEERR(data), fprintf (stderr, ": wanted {%d, %d}, got {%d, %d}\n",
                                    (int)results->rm_so, (int)results->rm_eo,
                                    (int)match.rm_so, (int)match.rm_eo);
        }

      regfree(&rgx);
    }

  return err;
}


static const char *const ać_data[2] = {"_aaćdef", "_aćdef"};
static const bool ać_exp[] = {false, true};

static bool
testać()
{
  bool err = false;
  regex_t rgx;
  const char *const regexes[] = {"ać"};
  assert (!regcomp (&rgx, *regexes, 0));

  for (size_t i = 0; i < 2; ++i)
    {
      regmatch_t match = bound;
      if (regexec (&rgx, ać_data[i], 1, &match, REG_STARTEND) == ać_exp[i])
        BASEERR(ać_data), fprintf (stderr, ": %s match\n",
                                   ać_exp[i] ? "no" : "yes");

      if (!MEQ(match, bound))
        BASEERR(ać_data), fprintf (stderr, ": wanted {%d, %d}, got {%d, %d}\n",
                                   (int)bound.rm_so, (int)bound.rm_eo,
                                   (int)match.rm_so, (int)match.rm_eo);
    }

  regfree(&rgx);
  return err;
}


static int
do_test (int argc, char **argv)
{
  (void) argc, (void) argv;
  assert (setlocale (LC_ALL, "C.UTF-8"));

  return testbunch (regex_ac, data_ac, results_ac) ||
         testbunch (regex_aa, data_aa, results_aa) ||
         testać ();
}


#if !STANDALONE
#include "../test-skeleton.c"
#else
int
main(int argc, char **argv)
{
  return do_test(argc, argv);
}
#endif


Download attachment "signature.asc" of type "application/pgp-signature" (834 bytes)

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.