firedrakeproject / firedrake

Firedrake is an automated system for the portable solution of partial differential equations using the finite element method (FEM)
https://firedrakeproject.org
Other
518 stars 160 forks source link

Segfault/buffer overrun in complicated form #1269

Open pefarrell opened 6 years ago

pefarrell commented 6 years ago

I'm trying to solve a fairly complicated problem (Frank-Oseen, augmented with an extra real-valued variable to eliminate rotational orbits). Here is a simplified code:

import sys

from firedrake import *
from petsc4py import PETSc
from math import atan2

def mdiv(n):
    return n[0].dx(0) + n[1].dx(1)

def mcurl(n):
    return as_vector([n[2].dx(1), -n[2].dx(0), n[1].dx(0) - n[0].dx(1)])

mesh = UnitSquareMesh(10, 10)
(x, y) = SpatialCoordinate(mesh)

Vele = VectorElement("CG", mesh.ufl_cell(), 2, dim=3)
Qele = FiniteElement("DG", mesh.ufl_cell(), 0)
Rele = FiniteElement("R",  mesh.ufl_cell(), 0)
Zele = MixedElement([Vele, Qele, Rele])
Z = FunctionSpace(mesh, Zele)

V = FunctionSpace(mesh, Vele)
nstar = Function(V)

z = Function(Z)
w = TestFunction(Z)
(n, lmbda, c) = split(z)
(v, _    , d) = split(w)

K = 1

E = 0.5 * (
     K * inner(mdiv(n), mdiv(n))*dx
   + K * inner(dot(n, mcurl(n)), dot(n, mcurl(n)))*dx
   + K * inner(cross(n, mcurl(n)), cross(n, mcurl(n)))*dx
   )

def epsbar(n): # infinitesimal generator of Lie group of rotations
    return -y * n.dx(0) + x * n.dx(1)

L = E + inner(lmbda, dot(n, n) - 1)*dx
F = (
       derivative(L, z, w)
     + c * inner(epsbar(n), v)*dx
     + d * inner(epsbar(nstar), n - nstar)*dx
    )

r = sqrt(x*x + y*y)
bcs = [DirichletBC(Z.sub(0), (1, 0, 0), "on_boundary")]

class Guess(Expression):
    def eval(self, values, x):
        r = sqrt(x[0]**2 + x[1]**2)
        theta = atan2(x[1], x[0])

        values[0] = cos(theta)
        values[1] = sin(theta)
        values[2] = 0

    def value_shape(self):
        return (3,)

z.split()[0].interpolate(Guess())
nstar.interpolate(Guess())

sp = {
       "snes_max_it": 100,
       "snes_atol": 1.0e-9,
       "snes_rtol": 0.0,
       "snes_monitor": None,
       "snes_converged_reason": None,
       "snes_linesearch_type": "l2",
       "snes_linesearch_maxstep": 1.0,
       "snes_linesearch_damping": 1.0,
       "snes_linesearch_monitor": None,
       "ksp_type": "gmres",
       "pc_type": "fieldsplit",
       "pc_fieldsplit_dm_splits": False,
       "pc_fieldsplit_0_fields": "0,1",
       "pc_fieldsplit_1_fields": "2",
       "pc_fieldsplit_type": "schur",
       "pc_fieldsplit_schur_type": "full",
       "fieldsplit_0_ksp_type": "preonly",
       "fieldsplit_0_pc_type": "python",
       "fieldsplit_0_pc_python_type": "firedrake.AssembledPC",
       "assembled_fieldsplit_0_ksp_type": "preonly",
       "assembled_fieldsplit_0_pc_type": "cholesky",
       "assembled_fieldsplit_0_pc_factor_mat_solver_type": "mumps",
       "assembled_fieldsplit_0_mat_type": "preonly",
       "fieldsplit_1_ksp_type": "gmres",
       "fieldsplit_1_pc_type": "none",
       "mat_type": "matfree",
       "mat_mumps_icntl_24": 1,
       "mat_mumps_icntl_13": 1
       }

solve(F == 0, z, bcs)

When I run this, I get a segfault/double free or memory corruption error (it varies from run to run). Running with PYOP2_DEBUG=1 and electric fence activated, it segfaults on

Thread 1 "python" received signal SIGSEGV, Segmentation fault.
0x00007fffbb50e8ef in wrap_form02_cell_integral_otherwise (start=0, end=200, arg0_0=0x7fffbd84a6a8, arg0_0_map0_0=0x7fffbd814d40, arg1_0=0x7fffbe481870, arg1_0_map0_0=0x7fffbe4386a0, arg2_0=0x7fffc08526a8, 
    arg2_0_map0_0=0x7fffc0893d40, arg3_0=0x7fffc08406a8, arg3_0_map0_0=0x7fffc0893d40, arg4_0=0x7fffbd82e9c0, arg4_0_map0_0=0x7fffc08dbce0, arg5_0=0x7fffc0965ff8)
    at /home/pefarrell/local/firedrake/firedrake-dev-20180726/.cache/pyop2/1f4261e32a8bd43a4e4a4f359d63a647_p28040.c:160
160       *(arg0_0 + (arg0_0_map0_0[i * 6 + i_0])* 3) += buffer_arg0_0[i_0*3 + 0];
(gdb) bt
#0  0x00007fffbb50e8ef in wrap_form02_cell_integral_otherwise (start=0, end=200, arg0_0=0x7fffbd84a6a8, arg0_0_map0_0=0x7fffbd814d40, arg1_0=0x7fffbe481870, arg1_0_map0_0=0x7fffbe4386a0, arg2_0=0x7fffc08526a8, 
    arg2_0_map0_0=0x7fffc0893d40, arg3_0=0x7fffc08406a8, arg3_0_map0_0=0x7fffc0893d40, arg4_0=0x7fffbd82e9c0, arg4_0_map0_0=0x7fffc08dbce0, arg5_0=0x7fffc0965ff8)
    at /home/pefarrell/local/firedrake/firedrake-dev-20180726/.cache/pyop2/1f4261e32a8bd43a4e4a4f359d63a647_p28040.c:160
#1  0x00007fffeabe0e18 in ffi_call_unix64 () from /usr/lib/x86_64-linux-gnu/libffi.so.6
#2  0x00007fffeabe087a in ffi_call () from /usr/lib/x86_64-linux-gnu/libffi.so.6
#3  0x00007fffeadf496d in _ctypes_callproc () from /usr/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so
#4  0x00007fffeadeb727 in ?? () from /usr/lib/python3.6/lib-dynload/_ctypes.cpython-36m-x86_64-linux-gnu.so
#5  0x000000000045969e in PyObject_Call ()
#6  0x0000000000552029 in _PyEval_EvalFrameDefault ()
#7  0x000000000054efc1 in ?? ()
#8  0x00000000005581e9 in _PyFunction_FastCallDict ()
#9  0x0000000000459c11 in _PyObject_Call_Prepend ()
#10 0x000000000045969e in PyObject_Call ()
#11 0x00000000004e050b in ?? ()
#12 0x000000000045969e in PyObject_Call ()
#13 0x0000000000552029 in _PyEval_EvalFrameDefault ()
#14 0x000000000054efc1 in ?? ()
#15 0x00000000005581e9 in _PyFunction_FastCallDict ()
#16 0x0000000000459c11 in _PyObject_Call_Prepend ()
#17 0x000000000045969e in PyObject_Call ()
#18 0x0000000000552029 in _PyEval_EvalFrameDefault ()
#19 0x000000000054e4c8 in ?? ()
#20 0x000000000054f4f6 in ?? ()
#21 0x0000000000553aaf in _PyEval_EvalFrameDefault ()
#22 0x000000000054e4c8 in ?? ()
#23 0x000000000054f4f6 in ?? ()
#24 0x0000000000553aaf in _PyEval_EvalFrameDefault ()
#25 0x000000000054e4c8 in ?? ()
#26 0x000000000054f4f6 in ?? ()
#27 0x0000000000553aaf in _PyEval_EvalFrameDefault ()
#28 0x000000000054e4c8 in ?? ()
#29 0x00000000005582c2 in _PyFunction_FastCallDict ()
#30 0x0000000000459c11 in _PyObject_Call_Prepend ()
#31 0x000000000045969e in PyObject_Call ()
#32 0x00000000004e050b in ?? ()
#33 0x0000000000459893 in _PyObject_FastCallDict ()
#34 0x000000000054f117 in ?? ()
#35 0x0000000000553aaf in _PyEval_EvalFrameDefault ()
#36 0x000000000054efc1 in ?? ()
#37 0x000000000054f24d in ?? ()
#38 0x0000000000553aaf in _PyEval_EvalFrameDefault ()
#39 0x000000000054efc1 in ?? ()
#40 0x000000000054ffee in PyEval_EvalCodeEx ()
#41 0x000000000048b86d in ?? ()
#42 0x00007ffff573e8ec in __Pyx_PyObject_Call (func=0x7fffc665d840, arg=0x7fffbca053b8, kw=0x7fffbd545b40) at src/petsc4py.PETSc.c:285075
#43 0x00007ffff58c8c4d in __pyx_f_8petsc4py_5PETSc_SNES_Jacobian (__pyx_v_snes=<optimised out>, __pyx_v_x=<optimised out>, __pyx_v_J=<optimised out>, __pyx_v_P=<optimised out>, __pyx_v_ctx=<optimised out>)
    at src/petsc4py.PETSc.c:34974
#44 0x00007ffff48d2c65 in SNESComputeJacobian (snes=snes@entry=0x7fffbd650af0, X=X@entry=0x7fffbd654a10, A=0x7fffbdc03510, B=0x7fffbdc03510)
    at /home/pefarrell/local/firedrake/firedrake-dev-20180726/src/petsc/src/snes/interface/snes.c:2517
#45 0x00007ffff4916f8b in SNESSolve_NEWTONLS (snes=0x7fffbd650af0) at /home/pefarrell/local/firedrake/firedrake-dev-20180726/src/petsc/src/snes/impls/ls/ls.c:222
#46 0x00007ffff48dcbdb in SNESSolve (snes=0x7fffbd650af0, b=0x0, x=<optimised out>) at /home/pefarrell/local/firedrake/firedrake-dev-20180726/src/petsc/src/snes/interface/snes.c:4350
#47 0x00007ffff57bccda in __pyx_pf_8petsc4py_5PETSc_4SNES_144solve (__pyx_v_self=0x7fffbdcd4308, __pyx_v_x=<optimised out>, __pyx_v_b=<optimised out>) at src/petsc4py.PETSc.c:181174
#48 __pyx_pw_8petsc4py_5PETSc_4SNES_145solve (__pyx_v_self=0x7fffbdcd4308, __pyx_args=<optimised out>, __pyx_kwds=<optimised out>) at src/petsc4py.PETSc.c:50051
---Type <return> to continue, or q <return> to quit---

The generated code looks like

#include <petsc.h>
#include <stdbool.h>
#include <math.h>
#include <inttypes.h>

#include <immintrin.h>

 static inline void form02_cell_integral_otherwise (double  A[18] , const double *restrict coords , const double *restrict w_0 , const double *restrict w_1 , const double *restrict w_2 , const double *restrict w_3 )
{
  static const double  t0[6][6]  = {{-0.074803807748196, 0.517632341987674, -0.0748038077481967, 0.299215230992787, 0.0335448115231482, 0.299215230992784}, 
  {-0.074803807748196, -0.0748038077481967, 0.517632341987674, 0.299215230992787, 0.299215230992784, 0.0335448115231484}, 
  {0.517632341987671, -0.0748038077481967, -0.0748038077481967, 0.0335448115231487, 0.299215230992787, 0.299215230992787}, 
  {-0.0482083778155119, -0.0847304930939779, -0.0482083778155119, 0.192833511262048, 0.795480226200906, 0.192833511262048}, 
  {-0.0482083778155119, -0.048208377815512, -0.0847304930939778, 0.192833511262048, 0.192833511262048, 0.795480226200906}, 
  {-0.0847304930939777, -0.048208377815512, -0.0482083778155119, 0.795480226200906, 0.192833511262048, 0.192833511262048}};
  double  t1  = ((double)(-1) * coords[1]);
  double  t2  = (t1 + coords[3]);
  double  t3  = ((double)(-1) * coords[0]);
  double  t4  = (t3 + coords[2]);
  double  t5  = (t1 + coords[5]);
  double  t6  = (t3 + coords[4]);
  double  t7  = ((t4 * t5) + ((double)(-1) * (t6 * t2)));
  double  t8  = ((double)(1) / t7);
  double  t9  = (((double)(-1) * t2) * t8);
  double  t10  = (t5 * t8);
  double  t11  = (t4 * t8);
  double  t12  = (((double)(-1) * t6) * t8);
  static const double  t13[6][4]  = {{0.09157621350977, 0.816847572980459, 0.091576213509771}, 
  {0.0915762135097701, 0.0915762135097711, 0.816847572980459}, 
  {0.816847572980458, 0.091576213509771, 0.091576213509771}, 
  {0.445948490915965, 0.10810301816807, 0.445948490915965}, 
  {0.445948490915965, 0.445948490915965, 0.10810301816807}, 
  {0.10810301816807, 0.445948490915965, 0.445948490915965}};
  double  t14  = fabs(t7);
  static const double  t15[6]  = {0.054975871827661, 0.054975871827661, 0.054975871827661, 0.111690794839005, 0.111690794839005, 0.111690794839005};
  static const double  t16[6][6]  = {{0.633695145960923, 2.26739029192184, 0.0, 0.366304854039083, -0.366304854039083, -2.90108543788276}, 
  {0.633695145960919, -0.633695145960917, 0.0, 3.26739029192183, -3.26739029192183, 0.0}, 
  {-2.26739029192183, -0.633695145960919, 0.0, 0.366304854039083, -0.366304854039083, 2.90108543788275}, 
  {-0.78379396366386, -0.567587927327721, 0.0, 1.78379396366386, -1.78379396366386, 1.35138189099158}, 
  {-0.783793963663859, 0.783793963663859, 0.0, 0.432412072672279, -0.432412072672279, 0.0}, 
  {0.567587927327721, 0.78379396366386, 0.0, 1.78379396366386, -1.78379396366386, -1.35138189099158}};
  static const double  t17[6][6]  = {{0.633695145960915, 0.0, -0.633695145960915, 3.26739029192182, 0.0, -3.26739029192184}, 
  {0.633695145960914, 0.0, 2.26739029192184, 0.366304854039067, -2.90108543788275, -0.366304854039083}, 
  {-2.26739029192184, 0.0, -0.633695145960916, 0.366304854039074, 2.90108543788276, -0.366304854039082}, 
  {-0.783793963663865, 0.0, 0.783793963663862, 0.432412072672268, 0.0, -0.43241207267228}, 
  {-0.783793963663864, 0.0, -0.567587927327719, 1.78379396366385, 1.35138189099159, -1.78379396366386}, 
  {0.567587927327715, 0.0, 0.783793963663862, 1.78379396366385, -1.35138189099157, -1.78379396366386}};

  for (int  ip  = 0; ip < 6; ip += 1)
  {
    double  t23  = 0.0;
    double  t22  = 0.0;
    double  t21  = 0.0;
    double  t20  = 0.0;
    double  t19  = 0.0;
    double  t18  = 0.0;

    for (int  i_0  = 0; i_0 < 6; i_0 += 1)
    {
      t18 += t17[ip][i_0] * w_1[i_0*3+0];
      t19 += t16[ip][i_0] * w_1[i_0*3+0];
      t20 += t17[ip][i_0] * w_1[i_0*3+1];
      t21 += t16[ip][i_0] * w_1[i_0*3+1];
      t22 += t17[ip][i_0] * w_1[i_0*3+2];
      t23 += t16[ip][i_0] * w_1[i_0*3+2];

    }
    double  t24  = (t15[ip] * t14);
    double  t25  = (((t13[ip][0] * coords[0]) + (t13[ip][1] * coords[2])) + (t13[ip][2] * coords[4]));
    double  t26  = ((double)(-1) * (((t13[ip][0] * coords[1]) + (t13[ip][1] * coords[3])) + (t13[ip][2] * coords[5])));
    double  t27  = (t24 * ((t25 * ((t23 * t12) + (t22 * t11))) + (((t23 * t10) + (t22 * t9)) * t26)));
    double  t28  = (t24 * ((t25 * ((t21 * t12) + (t20 * t11))) + (((t21 * t10) + (t20 * t9)) * t26)));
    double  t29  = (t24 * ((t25 * ((t19 * t12) + (t18 * t11))) + (((t19 * t10) + (t18 * t9)) * t26)));

    for (int  j0  = 0; j0 < 6; j0 += 1)
    {
      #pragma coffee expression
      A[j0*3+0] += t0[ip][j0] * t29;
      #pragma coffee expression
      A[j0*3+1] += t0[ip][j0] * t28;
      #pragma coffee expression
      A[j0*3+2] += t0[ip][j0] * t27;

    }

  }

}

struct MapMask {
    /* Row pointer */
    PetscSection section;
    /* Indices */
    const PetscInt *indices;
};
struct EntityMask {
    PetscSection section;
    const int64_t *bottom;
    const int64_t *top;
};
static PetscErrorCode apply_extruded_mask(PetscSection section,
                                          const PetscInt mask_indices[],
                                          const int64_t mask,
                                          const int facet_offset,
                                          const int nbits,
                                          const int value_offset,
                                          PetscInt map[])
{
    PetscErrorCode ierr;
    PetscInt dof, off;
    /* Shortcircuit for interior cells */
    if (!mask) return 0;
    for (int bit = 0; bit < nbits; bit++) {
        if (mask & (1L<<bit)) {
            ierr = PetscSectionGetDof(section, bit, &dof); CHKERRQ(ierr);
            ierr = PetscSectionGetOffset(section, bit, &off); CHKERRQ(ierr);
            for (int k = off; k < off + dof; k++) {
                map[mask_indices[k] + facet_offset] += value_offset;
            }
        }
    }
    return 0;
}
PetscErrorCode wrap_form02_cell_integral_otherwise(int start,
                      int end,
                      double *arg0_0, int32_t *arg0_0_map0_0, double *arg1_0, int32_t *arg1_0_map0_0, double *arg2_0, int32_t *arg2_0_map0_0, double *arg3_0, int32_t *arg3_0_map0_0, double *arg4_0, int32_t *arg4_0_map0_0, double *arg5_0
                      ) {
  PetscErrorCode ierr;
  for ( int n = start; n < end; n++ ) {
    int32_t i = (int32_t)n;
    double buffer_arg0_0[18] __attribute__((aligned(16))) = {0.0};
double buffer_arg1_0[6] __attribute__((aligned(16)));
double buffer_arg2_0[18] __attribute__((aligned(16)));
double buffer_arg3_0[18] __attribute__((aligned(16)));
double buffer_arg4_0[1] ;
    for (int i_0=0; i_0<3; ++i_0) {
buffer_arg1_0[i_0*2] = *(arg1_0 + (arg1_0_map0_0[i * 3 + i_0])* 2);
buffer_arg1_0[i_0*2 + 1] = *(arg1_0 + (arg1_0_map0_0[i * 3 + i_0])* 2 + 1);
};
for (int i_0=0; i_0<6; ++i_0) {
buffer_arg2_0[i_0*3] = *(arg2_0 + (arg2_0_map0_0[i * 6 + i_0])* 3);
buffer_arg2_0[i_0*3 + 1] = *(arg2_0 + (arg2_0_map0_0[i * 6 + i_0])* 3 + 1);
buffer_arg2_0[i_0*3 + 2] = *(arg2_0 + (arg2_0_map0_0[i * 6 + i_0])* 3 + 2);
};
for (int i_0=0; i_0<6; ++i_0) {
buffer_arg3_0[i_0*3] = *(arg3_0 + (arg3_0_map0_0[i * 6 + i_0])* 3);
buffer_arg3_0[i_0*3 + 1] = *(arg3_0 + (arg3_0_map0_0[i * 6 + i_0])* 3 + 1);
buffer_arg3_0[i_0*3 + 2] = *(arg3_0 + (arg3_0_map0_0[i * 6 + i_0])* 3 + 2);
};
for (int i_0=0; i_0<1; ++i_0) {
buffer_arg4_0[i_0*1] = *(arg4_0 + (arg4_0_map0_0[i * 1 + i_0])* 1);
}
    form02_cell_integral_otherwise(buffer_arg0_0, buffer_arg1_0, buffer_arg2_0, buffer_arg3_0, buffer_arg4_0, arg5_0);
    for (int i_0=0; i_0<6; ++i_0) {
      *(arg0_0 + (arg0_0_map0_0[i * 6 + i_0])* 3) += buffer_arg0_0[i_0*3 + 0];
*(arg0_0 + (arg0_0_map0_0[i * 6 + i_0])* 3 + 1) += buffer_arg0_0[i_0*3 + 1 + 0];
*(arg0_0 + (arg0_0_map0_0[i * 6 + i_0])* 3 + 2) += buffer_arg0_0[i_0*3 + 2 + 0];
    }
  }
  return 0;
}

with sizes

(gdb) print sizeof(arg0_0_map0_0)
$6 = 8
(gdb) print i
$7 = 30
(gdb) print i_0
$8 = 2
(gdb) print i * 6 + i_0
$9 = 182
(gdb) 

Is this user error, or a bug in the compiler?

wence- commented 6 years ago

Bug in Firedrake/PyOP2. We treat the off-diagonal 1xN and Nx1 blocks as matrices to the outside world. So we think they way to discard entries is to just put negative values in the Maps. But internally, we have just have an array (Dat) representing the dense block. We insert into that directly (through the provided maps). So negative entries lead to out of bounds memory access.

We could rework the PyOP2 dense stuff to use MatDense, and then things might work. Or we need to handle these strange matrices specially.

danshapero commented 4 years ago

I tried running the sample code and get a diverged linear solve rather than a segfault. Did #1662 fix this?

wence- commented 4 years ago

No, it didn't sadly.

danshapero commented 4 years ago

Woops, my mistake! I misunderstood the issue and thought the problem was only in assembly.