summary refs log tree commit diff
path: root/gnu/packages
diff options
context:
space:
mode:
authorLudovic Courtès <ludovic.courtes@inria.fr>2019-11-28 18:17:14 +0100
committerLudovic Courtès <ludo@gnu.org>2019-11-28 18:43:54 +0100
commitd8f8adfebf2c4040b7c04ff5e158ec664b92c268 (patch)
tree62e46a2fef29891d51de44f319d15098e365ff1c /gnu/packages
parent478880c4a95730e1b024e13602de6094e6b70f2f (diff)
downloadguix-d8f8adfebf2c4040b7c04ff5e158ec664b92c268.tar.gz
gnu: psm: Disable memory statistics code.
* gnu/packages/patches/psm-disable-memory-stats.patch: New file.
* gnu/packages/linux.scm (psm)[source]: Use it.
* gnu/local.mk (dist_patch_DATA): Add it.
Diffstat (limited to 'gnu/packages')
-rw-r--r--gnu/packages/linux.scm3
-rw-r--r--gnu/packages/patches/psm-disable-memory-stats.patch62
2 files changed, 64 insertions, 1 deletions
diff --git a/gnu/packages/linux.scm b/gnu/packages/linux.scm
index 457f6579a8..a5564d9a90 100644
--- a/gnu/packages/linux.scm
+++ b/gnu/packages/linux.scm
@@ -5539,7 +5539,8 @@ libraries, which are often integrated directly into libfabric.")
        (patches (search-patches
                  "psm-arch.patch"     ; uname -p returns "unknown" on Debian 9
                  "psm-ldflags.patch"  ; build shared lib with LDFLAGS
-                 "psm-repro.patch"))))  ; reproducibility
+                 "psm-repro.patch"    ; reproducibility
+                 "psm-disable-memory-stats.patch"))))
     (build-system gnu-build-system)
     (outputs '("out" "debug"))
     (inputs `(("libuuid" ,util-linux)))
diff --git a/gnu/packages/patches/psm-disable-memory-stats.patch b/gnu/packages/patches/psm-disable-memory-stats.patch
new file mode 100644
index 0000000000..52cd88a4e1
--- /dev/null
+++ b/gnu/packages/patches/psm-disable-memory-stats.patch
@@ -0,0 +1,62 @@
+The memory statistics code leads to segfaults during initialization (on
+machines with InfiniPath networking):
+
+  (gdb) bt full
+  #0  ips_ptl_init (ep=0x1fc6af8, ptl=0x1fc6f88, ctl=0x1fc6d78) at ptl.c:224
+	  err = PSM_OK
+	  num_of_send_bufs = 1024
+	  num_of_send_desc = 4096
+	  imm_size = 128
+	  context = 0x1fc6b70
+	  user_info = 0x1fc6b90
+	  enable_shcontexts = 0
+	  current_count = <optimized out>
+  #1  0x00007fb2aa672abf in __psm_ep_open_internal (
+      unique_job_key=unique_job_key@entry=0x7ffed1ee5800 "<\207\020#5\271\267\200\354x\242e8\364zo", 
+      devid_enabled=devid_enabled@entry=0x7ffed1ee5724, opts_i=opts_i@entry=0x7ffed1ee5810, mq=<optimized out>, 
+      epo=epo@entry=0x7ffed1ee5710, epido=epido@entry=0x7ffed1ee5708) at psm_ep.c:929
+	  ep = 0x1fc6af8
+	  num_units = 1
+	  len = <optimized out>
+	  err = <optimized out>
+	  epaddr = 0x1e9dd78
+	  buf = "miriel044:2.0.", '\000' <repeats 113 times>
+	  p = <optimized out>
+	  e = <optimized out>
+	  old_cpuaff = 0x0
+	  old_unit = 0x0
+	  yield_cnt = {e_void = 0xfa, e_str = 0xfa <error: Cannot access memory at address 0xfa>, e_int = 250, 
+	    e_uint = 250, e_long = 250, e_ulong = 250, e_ulonglong = 250}
+	  no_cpuaff = {e_void = 0x0, e_str = 0x0, e_int = 0, e_uint = 0, e_long = 0, e_ulong = 0, e_ulonglong = 0}
+	  env_unit_id = {e_void = 0xffffffffffffffff, 
+	    e_str = 0xffffffffffffffff <error: Cannot access memory at address 0xffffffffffffffff>, e_int = -1, 
+	    e_uint = 4294967295, e_long = -1, e_ulong = 18446744073709551615, e_ulonglong = 18446744073709551615}
+	  env_port_id = {e_void = 0x0, e_str = 0x0, e_int = 0, e_uint = 0, e_long = 0, e_ulong = 0, e_ulonglong = 0}
+	  env_sl = {e_void = 0x0, e_str = 0x0, e_int = 0, e_uint = 0, e_long = 0, e_ulong = 0, e_ulonglong = 0}
+	  ptl_sizes = <optimized out>
+	  default_cpuaff = <optimized out>
+	  opts = {timeout = 180000000000, unit = -1, affinity = 0, shm_mbytes = 10, sendbufs_num = 1024, 
+	    network_pkey = 65535, port = 0, outsl = 0, service_id = 1152940698815692800, 
+	    path_res_type = PSM_PATH_RES_NONE, senddesc_num = 4096, imm_size = 128}
+	  amsh_ptl = 0x1fc6e48
+	  ips_ptl = 0x1fc6f88
+	  self_ptl = 0x1fc99c8
+	  i = 3
+
+It looks like ptl.c:24 is writing past the region that was malloc'd.
+
+Turning stats off solves the problem.
+
+diff --git a/psm_utils.c b/psm_utils.c
+index c8651fe..5514921 100644
+--- a/psm_utils.c
++++ b/psm_utils.c
+@@ -1058,7 +1058,7 @@ psmi_log_memstats(psmi_memtype_t type, int64_t nbytes)
+     return;
+ }
+ 
+-#define psmi_stats_mask PSMI_STATSTYPE_MEMORY
++#define psmi_stats_mask 0
+ 
+ #ifdef malloc
+ #undef malloc