/*
 * nasd_od.h
 *
 * Structures for on-disk NASD filesystem
 *
 * Author: Jim Zelenka
 */
/*
 * Copyright (c) of Carnegie Mellon University, 1997,1998,1999.
 *
 * Permission to reproduce, use, and prepare derivative works of
 * this software for internal use is granted provided the copyright
 * and "No Warranty" statements are included with all reproductions
 * and derivative works. This software may also be redistributed
 * without charge provided that the copyright and "No Warranty"
 * statements are included in all redistributions.
 *
 * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS.
 * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER
 * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED
 * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY
 * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE
 * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT
 * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
 */


#ifndef _NASD_OD_H_
#define _NASD_OD_H_

#include <nasd/nasd_options.h>
#include <nasd/nasd_drive_options.h>
#include <nasd/nasd_general.h>
#include <nasd/nasd_itypes.h>
#include <nasd/nasd_types.h>

#define NASD_OD_BASIC_BLOCKSIZE 8192
#define NASD_OD_BLOCK_SHIFT       13

#define NASD_OD_SECT_SIZE        512
#define NASD_OD_SECT_SHIFT         9

#define NASD_OD_SECTORS_PER_BLK (NASD_OD_BASIC_BLOCKSIZE/NASD_OD_SECT_SIZE)

typedef nasd_uint32   nasd_akvers_t;            /* auth key version */
typedef nasd_uint32   nasd_blkcnt_t;            /* count of blocks */
typedef nasd_uint32   nasd_blkno_t;             /* block number on physical disk */
typedef nasd_uint16   nasd_generation_t;        /* generation number */
typedef nasd_uint32   nasd_nodenum_t;           /* index to nasd node in fs */
typedef nasd_int64    nasd_oblkcnt_t;           /* object block count */
typedef nasd_int64    nasd_oblkno_t;            /* object block number */
typedef nasd_uint32   nasd_refcnt_t;            /* reference count */
typedef nasd_uint32   nasd_sectcnt_t;           /* count of sectors */
typedef nasd_uint32   nasd_sectno_t;            /* physical sector number (0-based) */

#define NASD_NULL_BLKNO ((nasd_blkno_t)0)
#define NASD_NULL_NODE  ((nasd_nodenum_t)0)

/*
 * Placeholder value- indicates that a slot is reserved, and someone
 * is about to fill it in with real data (used so locks on slot can
 * be released while value is obtained)
 */
#define NASD_CR_BLKNO   ((nasd_blkno_t)0xffffffff)

typedef struct nasd_od_disk_s    nasd_od_disk_t;
typedef struct nasd_od_extent_s  nasd_od_extent_t;
typedef struct nasd_od_node_s    nasd_od_node_t;
typedef struct nasd_od_part_s    nasd_od_part_t;
typedef struct nasd_od_pte_s     nasd_od_pte_t;

/*
 * Format of a NASD identifier as used by this drive:
 * bits 00..31: node number
 * bits 32..52: lvl-2 hint (!!! only 10 bits here)
 * bits 53..56: partition number
 * bits 57..63: generation number
 *
 * If this formatting changes, nasd_decompose.c, nasd_control.c,
 * and nasd_obj.c will all require changes.
 */
#define NASD_OD_GEN_MASK 0x7f

/*
 * Maximum number of bits available in a NASD id to store
 * a hint about a block number
 */
#define NASD_MAX_BLK_HINTBITS 21
#define NASD_MAX_BLK_HINT_BLKNUM (1<<NASD_MAX_NPT_HINTBITS)

/*
 * Generic layout info:
 *
 * Everything is managed in BASIC_BLOCKSIZE sized blocks.
 * However, the first such block starts at sector number
 * nasd_firstblock_offset. Preceding sectors store things
 * like the disk structure itself, and maybe the refcnts or
 * something. Also, the disk structure is duplicated at the
 * end of the disk, because hey, if you lose that, you are
 * _so_ doomed.
 */

/*
 * nasd_od_pte (8 bytes)
 *
 * Entry in the node pagetable.
 */
struct nasd_od_pte_s {
  nasd_blkno_t  blkno;    /* block number of node */
  nasd_uint32   highbits; /* high 32 bits of nasdid */
};

/*
 * nasd_od_extent (8 bytes)
 *
 * Used to represent a ranges of blocks on disk.
 */
struct nasd_od_extent_s {
  nasd_blkno_t  first;
  nasd_blkno_t  last;
};

/*
 * nasd_od_indirect_ptr (4 bytes)
 */
typedef struct nasd_od_indirect_ptr_s {
  nasd_blkno_t  blkno;
} nasd_od_indirect_ptr_t;

#if NASD_OD_EXT_PTR > 0

/*
 * nasd_od_direct_ptr (20 bytes)
 */
typedef struct nasd_od_direct_ptr_s {
  nasd_blkno_t  blkno;
  nasd_digest_t digest;
} nasd_od_direct_ptr_t;


/*
 * (generated by /usr16/jimz/nasd/drive/blockparam.c)
 *
 * Node layout:
 *  FFS-style n-level indirect.
 *
 * Direct pointer size:          24
 * Indirect pointer size:        4
 * Space available for pointing: 7828 (1957 ipointers, 326 dpointers)
 * Spare space:                  0 bytes (0 ipointers, 0 dpointers)
 * Pointers at level 0:          228
 * Pointers at level 1:          476
 * Pointers at level 2:          64
 * Pointers at level 3:          32
 * Pointers at level 4:          17
 * Bytes pointed to at level 0: 1867776
 * Bytes pointed to at level 1: 1329692672
 * Bytes pointed to at level 2: 366145961984
 * Bytes pointed to at level 3: 374933465071616
 * Bytes pointed to at level 4: 407927609997918208
 * Indirect levels for   2^32 coverage: 2
 * Indirect levels for   2^33 coverage: 2
 * Indirect levels for 0.5 TB coverage: 3
 * Indirect levels for 1.0 TB coverage: 3
 * Indirect levels for 3.0 TB coverage: 3
 * Indirect levels for 1.0 PB coverage: 4
 * Indirect levels for 8.0 PB coverage: 4
 * 1.0 EB coverage exceeds pointing capacity
 * 4.0 EB coverage exceeds pointing capacity
 *   2^63 coverage exceeds pointing capacity
 */
#define NASD_OD_LVL0_PTRS    228
#define NASD_OD_LVL0_POINTED nasd_int64cast(228)
#define NASD_OD_LVL1_PTRS    476
#define NASD_OD_LVL1_POINTED nasd_int64cast(162316)
#define NASD_OD_LVL2_PTRS    64
#define NASD_OD_LVL2_POINTED nasd_int64cast(44695552)
#define NASD_OD_LVL3_PTRS    32
#define NASD_OD_LVL3_POINTED nasd_int64cast(45768245248)
#define NASD_OD_LVL4_PTRS    17
#define NASD_OD_LVL4_POINTED nasd_int64cast(49795850829824)
#define NASD_OD_ILVLS           5
#define NASD_OD_NODE_PTRS0      228
#define NASD_OD_NODE_PTRS       589
#define NASD_OD_MAX_OBJ_LEN     nasd_uint64cast(408302910940512256)
#define NASD_OD_PTR0_SIZE       24
#define NASD_OD_PTR_SIZE        4
#define NASD_OD_PTR_BYTES       7828
#define NASD_OD_PTR_SPARE_BYTES 0
/*                  2^64-1 = 18446744073709551615 (now you can stop asking :-) */
#if NASD_OD_INCLUDE_COUNTS > 0
static nasd_oblkno_t nasd_od_ilvl_top_ptrs[NASD_OD_ILVLS] = 
  {228, 476, 64, 32, 17};
static nasd_oblkno_t nasd_od_ilvl_ptrs[NASD_OD_ILVLS] = 
  {nasd_int64cast(228), nasd_int64cast(162316), nasd_int64cast(44695552), nasd_int64cast(45768245248), nasd_int64cast(49795850829824)};
static nasd_oblkno_t nasd_od_ilvl_ptr_psum[NASD_OD_ILVLS] = 
  {0, 5472, 7376, 7632, 7760};
static nasd_oblkno_t nasd_od_ilvl_leaves[NASD_OD_ILVLS] = 
  {nasd_int64cast(1), nasd_int64cast(341), nasd_int64cast(698368), nasd_int64cast(1430257664), nasd_int64cast(2929167695872)};
static nasd_offset_t nasd_od_ilvl_offset[NASD_OD_ILVLS] = {
  nasd_int64cast(0),
  nasd_int64cast(1867776),
  nasd_int64cast(1331560448),
  nasd_int64cast(367477522432),
  nasd_int64cast(375300942594048)};
#endif /* NASD_OD_INCLUDE_COUNTS > 0 */

#else /* NASD_OD_EXT_PTR > 0 */

/*
 * nasd_od_direct_ptr (4 bytes)
 */
typedef struct nasd_od_direct_ptr_s {
  nasd_blkno_t  blkno;
} nasd_od_direct_ptr_t;

/*
 * (generated by /usr16/jimz/nasd/ondisk/blockparam.c)
 *
 * Node layout:
 *  FFS-style n-level indirect.
 *
 * Direct pointer size:          4
 * Indirect pointer size:        4
 * Space available for pointing: 7828 (1957 ipointers, 1957 dpointers)
 * Spare space:                  0 bytes (0 ipointers, 0 dpointers)
 * Pointers at level 0:          512
 * Pointers at level 1:          512
 * Pointers at level 2:          360
 * Pointers at level 3:          512
 * Pointers at level 4:          61
 * Bytes pointed to at level 0: 4194304
 * Bytes pointed to at level 1: 8589934592
 * Bytes pointed to at level 2: 12369505812480
 * Bytes pointed to at level 3: 36028797018963968
 * Bytes pointed to at level 4: 8791026472627208192
 * Indirect levels for   2^32 coverage: 1
 * Indirect levels for   2^33 coverage: 1
 * Indirect levels for 0.5 TB coverage: 2
 * Indirect levels for 1.0 TB coverage: 2
 * Indirect levels for 3.0 TB coverage: 2
 * Indirect levels for 1.0 PB coverage: 3
 * Indirect levels for 8.0 PB coverage: 3
 * Indirect levels for 1.0 EB coverage: 4
 * Indirect levels for 4.0 EB coverage: 4
 *   2^63 coverage exceeds pointing capacity
 */
#define NASD_OD_LVL0_PTRS    512
#define NASD_OD_LVL0_POINTED nasd_int64cast(512)
#define NASD_OD_LVL1_PTRS    512
#define NASD_OD_LVL1_POINTED nasd_int64cast(1048576)
#define NASD_OD_LVL2_PTRS    360
#define NASD_OD_LVL2_POINTED nasd_int64cast(1509949440)
#define NASD_OD_LVL3_PTRS    512
#define NASD_OD_LVL3_POINTED nasd_int64cast(4398046511104)
#define NASD_OD_LVL4_PTRS    61
#define NASD_OD_LVL4_POINTED nasd_int64cast(1073123348709376)
#define NASD_OD_ILVLS           5
#define NASD_OD_NODE_PTRS0      512
#define NASD_OD_NODE_PTRS       1445
#define NASD_OD_MAX_OBJ_LEN     nasd_uint64cast(8827067647746113536)
#define NASD_OD_PTR0_SIZE       4
#define NASD_OD_PTR_SIZE        4
#define NASD_OD_PTR_BYTES       7828
#define NASD_OD_PTR_SPARE_BYTES 0
/*                  2^64-1 = 18446744073709551615 (now you can stop asking :-) */
#if NASD_OD_INCLUDE_COUNTS > 0
static nasd_oblkno_t nasd_od_ilvl_top_ptrs[NASD_OD_ILVLS] = 
  {512, 512, 360, 512, 61};
static nasd_oblkno_t nasd_od_ilvl_ptrs[NASD_OD_ILVLS] = 
  {nasd_int64cast(512), nasd_int64cast(1048576), nasd_int64cast(1509949440),
   nasd_int64cast(4398046511104), nasd_int64cast(1073123348709376) };
static nasd_oblkno_t nasd_od_ilvl_ptr_psum[NASD_OD_ILVLS] = 
  {0, 2048, 4096, 5536, 7584};
static nasd_oblkno_t nasd_od_ilvl_leaves[NASD_OD_ILVLS] = 
  {1, 2048, 4194304, nasd_int64cast(8589934592),
   nasd_int64cast(17592186044416) };
static nasd_offset_t nasd_od_ilvl_offset[NASD_OD_ILVLS] = {
  nasd_int64cast(0),
  nasd_int64cast(4194304),
  nasd_int64cast(8594128896),
  nasd_int64cast(12378099941376),
  nasd_int64cast(36041175118905344)};
#endif /* NASD_OD_INCLUDE_COUNTS > 0 */

#endif /* NASD_OD_EXT_PTR > 0 */

#define NASD_OD_REFS_PER_BLOCK  (NASD_OD_BASIC_BLOCKSIZE/sizeof(nasd_refcnt_t))
#define NASD_OD_IPTRS_PER_BLOCK (NASD_OD_BASIC_BLOCKSIZE/NASD_OD_PTR_SIZE)
#define NASD_OD_DPTRS_PER_BLOCK (NASD_OD_BASIC_BLOCKSIZE/NASD_OD_PTR0_SIZE)

/*
 * Last one is not a node pointer, but a pointer to the next pagetable level
 */
#define NASD_OD_NODES_PER_NPT_BLOCK \
  ((NASD_OD_BASIC_BLOCKSIZE/sizeof(nasd_od_pte_t))-1)

/*
 * nasd_node (8192 bytes)
 *
 * Fundamental metadata unit for a NASD object.
 *
 * COW entire: a node that is COW-entire of another node has
 * been fastcopied from it and neither node has changed since.
 * This means that we haven't updated refcnts on the leaves, yet.
 * The original object in this chain has cow_next set to a non-NULL
 * node number. The duplicate object has cow_src set to the parent,
 * and NASD_ND_COW_ENTIRE set in the flags. The set of identical
 * objects is a doubly-linked list via the cow_next and cow_prev
 * pointers. All lists are circularly complete; thus, a node with
 * no extant entire copies has cow_next = cow_prev = object_num.
 */
struct nasd_od_node_s {
  /* exported metadata: 316 bytes */
  nasd_blkcnt_t     blocks_allocated;        /* blocks actually used to store object */
  nasd_blkcnt_t     blocks_preallocated;     /* blocks allocated but not used to store data */
  nasd_uint64       object_len;              /* logical length of object */
  nasd_timespec_t   attr_modify_time;        /* when attributes last changed */
  nasd_timespec_t   object_modify_time;      /* when data last changed */
  nasd_timespec_t   object_create_time;      /* when object was created */
  nasd_timespec_t   fs_attr_modify_time;     /* when attributes last changed, fs modifiable */
  nasd_timespec_t   fs_object_modify_time;   /* when data last changed, fs modifiable */
  nasd_akvers_t     akvers;                  /* auth key version */
  unsigned char     fs_specific[NASD_FS_SPECIFIC_INFO_SIZE];
  /* internal metadata (other): 20 bytes */
  nasd_refcnt_t     refcnt;                  /* external references to inode (COW-entire) */
  nasd_uint32       flags;                   /* flags word (low four bits are partition number) */
  nasd_nodenum_t    cow_src;                 /* COW src object */
  nasd_nodenum_t    cow_next;                /* next object in COW-entire chain */
  nasd_nodenum_t    cow_prev;                /* prev object in COW-entire chain */
  /* internal metadata: 10 total */
  nasd_od_extent_t   prealloc_ex;            /* single extent of preallocated storage */
  nasd_generation_t  generation;             /* generation of node */
  /* nodenum (debugging): 4 bytes */
  nasd_nodenum_t     nodenum;
  /* disk blocks: 7828 bytes */
  unsigned char      ptrs[NASD_OD_PTR_BYTES]; /* block pointers */
#if NASD_OD_PTR_SPARE_BYTES > 0
  /* extra blockpointer bytes (unused) (should be 0 bytes) */
  unsigned char      spare_ptrs[NASD_OD_PTR_SPARE_BYTES];
#endif /* NASD_OD_PTR_SPARE_BYTES > 0 */
  /* layout hints */
  nasd_blkno_t       last_block;             /* physical block of last logical block in object */
  /* spare: 2 bytes */
  char               spare[6];
};

/*
 * nasd_od_part (112 bytes)
 *
 * On-disk representation of info for one partition.
 * Do not monkey with ordering of struct elements!
 */
struct nasd_od_part_s {
  nasd_identifier_t  first_obj;        /* id of "first" (automatic) object */
  nasd_timespec_t    last_cr_del;      /* last create or delete time */
  nasd_uint64        num_obj;          /* number of objects */
  nasd_blkcnt_t      part_size;        /* blocks allocated to partition */
  nasd_blkcnt_t      blocks_used;      /* blocks used by partition */
  nasd_blkcnt_t      blocks_allocated; /* blocks allocated in partition (includes prealloc) */
  nasd_uint16        generation;       /* node generation counter */
  nasd_uint16        min_protection;   /* Minimum security requirements */ 
  nasd_key_t         red_key;          /* short term key */
  nasd_key_t         black_key;        /* short term key */
  nasd_key_t         partition_key;    /* Partition control key */
  nasd_timespec_t    mod_time;         /* last modify time */
  nasd_timespec_t    create_time;      /* create time */
};

#define NASD_OD_MAXPARTS 4

#define NASD_OD_PARTNUM_VALID(_pn_) (((_pn_) >= 0) && ((_pn_) < NASD_OD_MAXPARTS))

/*
 * nasd_od_disk (NASD_OD_SECT_SIZE bytes)
 *
 * On-disk representation of info for a disk.
 */
struct nasd_od_disk_s {
  nasd_timespec_t   mod_time;                     /* last-modified time             (  8 bytes) */
  nasd_timespec_t   format_time;                  /* time drive was formatted       (  8 bytes) */
  nasd_od_part_t    partitions[NASD_OD_MAXPARTS]; /* per-partition information      (448 bytes) */
  nasd_od_extent_t  npt_ext;                      /* node pagetable range           (  8 bytes) */
  nasd_od_extent_t  npt2_ext;                     /* node pagetable dup range       (  8 bytes) */
  nasd_blkcnt_t     blocks_allocated;             /* blocks allocated to partitions (  8 bytes) */
  nasd_key_t        master_key;                   /* Master Key                     ( 16 bytes) */
  nasd_key_t        drive_key;                    /* Drive Key                      ( 16 bytes) */
  nasd_byte         initialized;                  /* Is the drive initialized       (  1 byte)  */
  nasd_byte_t       layout_type;                  /* Layout algorithm used          (  1 byte)  */

  /*
   * You might need to customize this to match your
   * platform's rules for padding/alignment.
   */
  char              spare[23];

};

/*
 * amount of object data that can fit in
 * an "atomic" node- equivalent to number
 * of bytes of internal metadata for "real" pointers
 */
#define NASD_ND_ATOMIC_SIZE NASD_OD_PTR_BYTES

#define NASD_ND_PARTMASK   0xf
#define NASD_ND_DELETE     (1<<4)  /* node in removal process */
#define NASD_ND_ATOMIC     (1<<5)  /* data contained in nasd_node */
#define NASD_ND_COW_ENTIRE (1<<6)  /* node is a virgin copy of another */

#define NASD_OD_INVALID_PART(_p_) ((_p_)->part_size == 0)
#define NASD_OD_PARTNUM(_np_) ((_np_)->flags&NASD_ND_PARTMASK)

/*
 * Actual data blocks are numbered 1-based
 */
#define NASD_ODC_REFBLK_CONV(_blk_)   ((_blk_)-nasd_odc_state->disk->npt_ext.first)
#define NASD_ODC_REFBLK_OF(_blk_)     (NASD_ODC_REFBLK_CONV(_blk_)/NASD_OD_REFS_PER_BLOCK)
#define NASD_ODC_OFF_IN_REFBLK(_blk_) (((_blk_)-1)%NASD_OD_REFS_PER_BLOCK)
#define NASD_ODC_REFBLK_FIRST(_bn_)   (((_bn_)*NASD_OD_REFS_PER_BLOCK)+1)
#define NASD_ODC_REFBLK_LAST(_bn_)    (((_bn_)+1)*NASD_OD_REFS_PER_BLOCK)

#endif /* !_NASD_OD_H_ */

/* Local Variables:  */
/* indent-tabs-mode: nil */
/* tab-width: 2 */
/* End: */
