New ondisk PFS data after hammer pfs-destroy

Tomohiro Kusumi kusumi.tomohiro at gmail.com
Wed Aug 17 07:14:27 PDT 2016


This probably isn't a bug, but this still is weird in terms of the
filesystem design.

--
1) Create fs, nothing strange in hammer show result

# newfs_hammer -L TEST /dev/da1 /dev/da2 /dev/da3
# mount_hammer /dev/da1:/dev/da2:/dev/da3 /HAMMER
# hammer -f /dev/da1:/dev/da2:/dev/da3 show | less
      1 Volume header   next_tid=0000000100000000
      2                 bufoffset=0000000014040000
      3                 zone 0  next_offset=0000000000000000
      4                 zone 1  next_offset=0000000000000000
      5                 zone 2  next_offset=0000000000000000
      6                 zone 3  next_offset=3000000000000000
      7                 zone 4  next_offset=2000000000000000
      8                 zone 5  next_offset=0000000000000000
      9                 zone 6  next_offset=0000000000000000
     10                 zone 7  next_offset=0000000000000000
     11                 zone 8  next_offset=8000000020801000
     12                 zone 9  next_offset=90000000210001b0
     13                 zone 10 next_offset=a000000000000000
     14                 zone 11 next_offset=b000000000000000
     15                 zone 12 next_offset=c000000000000000
     16                 zone 13 next_offset=d000000000000000
     17                 zone 14 next_offset=e000000000000000
     18                 zone 15 next_offset=f000000000000000
     19      NODE 8000000020800000 cnt=02 p=0000000000000000 type=L
depth=0 mirror=0000000000000000 fill=z8:v0:0:65:0=1% {
     20 G------ ELM  0 R lo=00000001 obj=0000000000000001 rt=01
key=0000000000000000 tid=0000000100000001
     21                  del=0000000000000000 ot=01
dataoff=9000000021000000/128 crc=c24e8742 fill=z9:v0:0:66:0=1%
     22                  inode size=0 nlinks=1 mode=00755
uflags=00000000 caps=05 pobjid=0000000000000000 ot=01
     23                        ctime=00053a445bdbcae1
mtime=00053a445bdbcae1 atime=00053a445bdbcae1
     24 G------ ELM  1 R lo=00000002 obj=0000000000000001 rt=15
key=0000000000000000 tid=0000000100000001
     25                  del=0000000000000000 ot=00
dataoff=9000000021000080/292 crc=9a311d2a fill=z9:v0:0:66:128=1%
     26                  pfs sync_beg_tid=0000000000000000
sync_end_tid=0000000000000000
     27                      shared_uuid=a6e96834-647d-11e6-91e2-75d435a9db68
     28                      unique_uuid=a6e96834-647d-11e6-91e2-75d435a9db68
     29                      mirror_flags=00000000 label="TEST"
     30      }

--
2) Create PFS#1, nothing strange in hammer show result

# hammer pfs-master a
# sync; sync; ...
# hammer -f /dev/da1:/dev/da2:/dev/da3 show | less
      1 Volume header   next_tid=0000000100008040
      2                 bufoffset=0000000014040000
      3                 zone 0  next_offset=0000000000000000
      4                 zone 1  next_offset=0000000000000000
      5                 zone 2  next_offset=0000000000000000
      6                 zone 3  next_offset=3000000000001500
      7                 zone 4  next_offset=2000000000000000
      8                 zone 5  next_offset=0000000000000000
      9                 zone 6  next_offset=0000000000000000
     10                 zone 7  next_offset=0000000000000000
     11                 zone 8  next_offset=8000000020801000
     12                 zone 9  next_offset=90000000210004a0
     13                 zone 10 next_offset=a000000000000000
     14                 zone 11 next_offset=b000000000000000
     15                 zone 12 next_offset=c000000000000000
     16                 zone 13 next_offset=d000000000000000
     17                 zone 14 next_offset=e000000000000000
     18                 zone 15 next_offset=f000000000000000
     19      NODE 8000000020800000 cnt=06 p=0000000000000000 type=L
depth=0 mirror=0000000100008020 fill=z8:v0:0:65:0=1% {
     20 G------ ELM  0 R lo=00000001 obj=0000000000000001 rt=01
key=0000000000000000 tid=0000000100000001
     21                  del=0000000000000000 ot=01
dataoff=9000000021000000/128 crc=c24e8742 fill=z9:v0:0:66:0=1%
     22                  inode size=0 nlinks=1 mode=00755
uflags=00000000 caps=05 pobjid=0000000000000000 ot=01
     23                        ctime=00053a445bdbcae1
mtime=00053a445feba105 atime=00053a445bdbcae1
     24 G------ ELM  1 R lo=00000001 obj=0000000000000001 rt=11
key=68b7be4356f40000 tid=0000000100008020
     25                  del=0000000000000000 ot=07
dataoff=9000000021000350/17 crc=7add08fb fill=z9:v0:0:66:848=1%
     26                  dir-entry ino=0000000100000745 lo=00000000 name="a"
     27 G------ ELM  2 R lo=00000001 obj=0000000100000745 rt=01
key=0000000000000000 tid=0000000100008020
     28                  del=0000000000000000 ot=07
dataoff=9000000021000250/128 crc=eff29666 fill=z9:v0:0:66:592=1%
     29                  inode size=10 nlinks=1 mode=00755
uflags=00000000 caps=00 pobjid=0000000000000001 ot=07
     30                        ctime=00053a445feba105
mtime=00053a445feba105 atime=00053a445feba105 symlink="@@PFS00001"
     31 G------ ELM  3 R lo=00000002 obj=0000000000000001 rt=15
key=0000000000000000 tid=0000000100000001
     32                  del=0000000000000000 ot=00
dataoff=9000000021000080/292 crc=9a311d2a fill=z9:v0:0:66:128=1%
     33                  pfs sync_beg_tid=0000000000000000
sync_end_tid=0000000000000000
     34                      shared_uuid=a6e96834-647d-11e6-91e2-75d435a9db68
     35                      unique_uuid=a6e96834-647d-11e6-91e2-75d435a9db68
     36                      mirror_flags=00000000 label="TEST"
     37 G------ ELM  4 R lo=00000002 obj=0000000000000001 rt=15
key=0000000000010000 tid=0000000100008020
     38                  del=0000000000000000 ot=01
dataoff=9000000021000370/292 crc=30dbd6e8 fill=z9:v0:0:66:880=1%
     39                  pfs sync_beg_tid=0000000000000001
sync_end_tid=0000000100000000
     40                      shared_uuid=d2b4be0a-647d-11e6-91e2-75d435a9db68
     41                      unique_uuid=d2b4be10-647d-11e6-91e2-75d435a9db68
     42                      mirror_flags=00000000 label=""
     43 G------ ELM  5 R lo=00010001 obj=0000000000000001 rt=01
key=0000000000000000 tid=0000000100008020
     44                  del=0000000000000000 ot=01
dataoff=90000000210002d0/128 crc=3adf14e0 fill=z9:v0:0:66:720=1%
     45                  inode size=0 nlinks=1 mode=00755
uflags=00000000 caps=05 pobjid=0000000000000000 ot=01
     46                        ctime=00053a445feba105
mtime=00053a445feba105 atime=00053a445feba105
     47      }

--
3) Destroy PFS#1
Now, the PFS inode and direntry for that symlink are marked with delete flag.
Ondisk PFS data for PFS#1 is marked with delete flag.
Root inode of PFS#1 is physically destroyed via
hammer_pfs_delete_at_cursor() via ioctl(HAMMERIOC_RMR_PSEUDOFS).

These are all fine, but there is a new ondisk PFS data for PFS#1 (at
ELM5) without delete flag set.
This comes from below part of hammer_cmd_pseudofs_destroy() in
sbin/hammer/cmd_pfs.c.

# hammer pfs-destroy a
# sync; sync; ...
# hammer -f /dev/da1:/dev/da2:/dev/da3 show | less
      1 Volume header   next_tid=00000001000080e0
      2                 bufoffset=0000000014040000
      3                 zone 0  next_offset=0000000000000000
      4                 zone 1  next_offset=0000000000000000
      5                 zone 2  next_offset=0000000000000000
      6                 zone 3  next_offset=3000000000003e98
      7                 zone 4  next_offset=2000000000000000
      8                 zone 5  next_offset=0000000000000000
      9                 zone 6  next_offset=0000000000000000
     10                 zone 7  next_offset=0000000000000000
     11                 zone 8  next_offset=8000000020801000
     12                 zone 9  next_offset=90000000210005d0
     13                 zone 10 next_offset=a000000000000000
     14                 zone 11 next_offset=b000000000000000
     15                 zone 12 next_offset=c000000000000000
     16                 zone 13 next_offset=d000000000000000
     17                 zone 14 next_offset=e000000000000000
     18                 zone 15 next_offset=f000000000000000
     19      NODE 8000000020800000 cnt=06 p=0000000000000000 type=L
depth=0 mirror=00000001000080c0 fill=z8:v0:0:65:0=1% {
     20 G------ ELM  0 R lo=00000001 obj=0000000000000001 rt=01
key=0000000000000000 tid=0000000100000001
     21                  del=0000000000000000 ot=01
dataoff=9000000021000000/128 crc=c24e8742 fill=z9:v0:0:66:0=1%
     22                  inode size=0 nlinks=1 mode=00755
uflags=00000000 caps=05 pobjid=0000000000000000 ot=01
     23                        ctime=00053a445bdbcae1
mtime=00053a44641acbfc atime=00053a445bdbcae1
     24 G------ ELM  1 R lo=00000001 obj=0000000000000001 rt=11
key=68b7be4356f40000 tid=0000000100008020
     25                d del=0000000100008060 ot=07
dataoff=9000000021000350/17 crc=7add08fb fill=z9:v0:0:66:848=1%
     26                  dir-entry ino=0000000100000745 lo=00000000 name="a"
     27 G------ ELM  2 R lo=00000001 obj=0000000100000745 rt=01
key=0000000000000000 tid=0000000100008020
     28                d del=0000000100008060 ot=07
dataoff=9000000021000250/128 crc=eff29666 fill=z9:v0:0:66:592=1%
     29                  inode size=10 nlinks=1 mode=00755
uflags=00000000 caps=00 pobjid=0000000000000001 ot=07
     30                        ctime=00053a445feba105
mtime=00053a445feba105 atime=00053a445feba105 symlink="@@PFS00001"
     31 G------ ELM  3 R lo=00000002 obj=0000000000000001 rt=15
key=0000000000000000 tid=0000000100000001
     32                  del=0000000000000000 ot=00
dataoff=9000000021000080/292 crc=9a311d2a fill=z9:v0:0:66:128=1%
     33                  pfs sync_beg_tid=0000000000000000
sync_end_tid=0000000000000000
     34                      shared_uuid=a6e96834-647d-11e6-91e2-75d435a9db68
     35                      unique_uuid=a6e96834-647d-11e6-91e2-75d435a9db68
     36                      mirror_flags=00000000 label="TEST"
     37 G------ ELM  4 R lo=00000002 obj=0000000000000001 rt=15
key=0000000000010000 tid=0000000100008020
     38                d del=00000001000080c0 ot=01
dataoff=9000000021000370/292 crc=30dbd6e8 fill=z9:v0:0:66:880=1%
     39                  pfs sync_beg_tid=0000000000000001
sync_end_tid=0000000100000000
     40                      shared_uuid=d2b4be0a-647d-11e6-91e2-75d435a9db68
     41                      unique_uuid=d2b4be10-647d-11e6-91e2-75d435a9db68
     42                      mirror_flags=00000000 label=""
     43 G------ ELM  5 R lo=00000002 obj=0000000000000001 rt=15
key=0000000000010000 tid=00000001000080c0
     44                  del=0000000000000000 ot=01
dataoff=90000000210004a0/292 crc=ef56173e fill=z9:v0:0:66:1184=1%
     45                  pfs sync_beg_tid=0000000000000001
sync_end_tid=0000000000000001
     46                      shared_uuid=d2b4be0a-647d-11e6-91e2-75d435a9db68
     47                      unique_uuid=d2b4be10-647d-11e6-91e2-75d435a9db68
     48                      mirror_flags=80000001 label=""
     49      }


Why do we need this new ondisk PFS#1 data at ELM5 when we're getting
rid of it (before we once fail to destroy PFS#1) ?
Given that PFS#1 is physically gone, it's weird that ondisk PFS#1 data
is still there under PFS#0 along with ondisk PFS#1 data with delete
flag set.
Since the new ondisk PFS#1 data appears after the one with delete flag
set within the node, B-Tree lookup doesn't make a mistake of returning
ELM5 as an effective undeleted PFS, but it's still confusing as no one
seems to ever refer to it.

sbin/hammer/cmd_pfs.c
 ...
 404         /*
 405          * Set the sync_beg_tid and sync_end_tid's to 1, once we start the
 406          * RMR the PFS is basically destroyed even if someone ^C's it.
 407          */
 408         pfs.ondisk->mirror_flags |= HAMMER_PFSD_SLAVE;
 409         pfs.ondisk->reserved01 = -1;
 410         pfs.ondisk->sync_beg_tid = 1;
 411         pfs.ondisk->sync_end_tid = 1;
 412
 413         if (ioctl(fd, HAMMERIOC_SET_PSEUDOFS, &pfs) < 0) {
 414                 fprintf(stderr, "Unable to update the PFS
configuration: %s\n",
 415                         strerror(errno));
 416                 exit(1);
 417         }



More information about the Users mailing list