/*
 * nasd_generic_sock.c
 *
 * UNIX userlevel sockets for SRPC
 *
 * Author: Jim Zelenka
 */
/*
 * Copyright (c) of Carnegie Mellon University, 1999.
 *
 * Permission to reproduce, use, and prepare derivative works of
 * this software for internal use is granted provided the copyright
 * and "No Warranty" statements are included with all reproductions
 * and derivative works. This software may also be redistributed
 * without charge provided that the copyright and "No Warranty"
 * statements are included in all redistributions.
 *
 * NO WARRANTY. THIS SOFTWARE IS FURNISHED ON AN "AS IS" BASIS.
 * CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY KIND, EITHER
 * EXPRESSED OR IMPLIED AS TO THE MATTER INCLUDING, BUT NOT LIMITED
 * TO: WARRANTY OF FITNESS FOR PURPOSE OR MERCHANTABILITY, EXCLUSIVITY
 * OF RESULTS OR RESULTS OBTAINED FROM USE OF THIS SOFTWARE. CARNEGIE
 * MELLON UNIVERSITY DOES NOT MAKE ANY WARRANTY OF ANY KIND WITH RESPECT
 * TO FREEDOM FROM PATENT, TRADEMARK, OR COPYRIGHT INFRINGEMENT.
 */


#include <nasd/nasd_options.h>
#include <nasd/nasd_threadstuff.h>
#include <nasd/nasd_shutdown.h>
#include <nasd/nasd_types.h>
#include <nasd/nasd_srpc.h>
#include <nasd/nasd_common.h>

#include <stdio.h>
#include <signal.h>
#include <netdb.h>
#include <sys/ioctl.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/errno.h>
#include <sys/param.h>
#include <sys/uio.h>
#include <arpa/inet.h>
#include <netinet/tcp.h>

#ifndef NASD_SRPC_USE_NONBLOCKING
#define NASD_SRPC_USE_NONBLOCKING 1
#endif /* !NASD_SRPC_USE_NONBLOCKING */

#ifndef NASD_SRPC_USE_RECVFROM
#define NASD_SRPC_USE_RECVFROM    1
#endif /* !NASD_SRPC_USE_RECVFROM */

/*
 * On dux, UIO_MAXIOV is more than we can fit on the stack (1024).
 * So we'll place a general limit.
 */
#define NASD_GS_MAX_MAX_IOV 16

#if NASD_UIO_MAXIOV > NASD_GS_MAX_MAX_IOV
#define NASD_GS_MAX_IOV NASD_GS_MAX_MAX_IOV
#else /* NASD_UIO_MAXIOV > NASD_GS_MAX_MAX_IOV */
#define NASD_GS_MAX_IOV NASD_UIO_MAXIOV
#endif /* NASD_UIO_MAXIOV > NASD_GS_MAX_MAX_IOV */

#if NASD_SRPC_USE_RECVFROM > 0
#define do_read(_ret_,_sock_,_buf_,_len_) { \
  struct msghdr _msg; \
  struct iovec _iov; \
\
  _iov.iov_base = _buf_; \
  _iov.iov_len = _len_; \
  _msg.msg_name = NULL; \
  _msg.msg_namelen = 0; \
  _msg.msg_iov = &_iov; \
  _msg.msg_iovlen = 1; \
  _msg.msg_control = NULL; \
  _msg.msg_controllen = 0; \
  _msg.msg_flags = 0; \
\
  _ret_ = recvmsg((_sock_)->sock.fd, &_msg, 0); \
}
#else /* NASD_SRPC_USE_RECVFROM > 0 */
#define do_read(_ret_,_sock_,_buf_,_len_)  _ret_ = read((_sock_)->sock.fd,_buf_,_len_)
#endif /* NASD_SRPC_USE_RECVFROM > 0 */


NASD_INLINE
nasd_status_t
nasd_srpc_sys_sock_bytes_pending(
  nasd_srpc_sock_t  *sock,
  int               *pendingp)
{
  int ret;

  ret = ioctl(sock->sock.fd, FIONREAD, pendingp);
  if (ret)
    return(NASD_FAIL);
  return(NASD_SUCCESS);
}

nasd_status_t
nasd_srpc_sys_sock_setblock(
  nasd_srpc_sock_t           *sock,
  int                         blocking)
{
  int ret, val;

#if NASD_SRPC_USE_NONBLOCKING == 0
  if (blocking == 0) {
    NASD_PANIC();
  }
#endif /* NASD_SRPC_USE_NONBLOCKING == 0 */

  val = blocking ? 0 : 1;
  ret = ioctl(sock->sock.fd, FIONBIO, &val);
  if (ret) {
    sock->sock.isblocking = 1;
    return(NASD_SRPC_NOT_ALL_OPTIONS_SET);
  }

  sock->sock.isblocking = blocking;

  return(NASD_SUCCESS);
}

nasd_status_t
nasd_srpc_sys_sock_setnodelay(
  nasd_srpc_sock_t           *sock,
  int                         nodelay)
{
  int ret, val;

  val = nodelay;
  ret = setsockopt(sock->sock.fd, IPPROTO_TCP, TCP_NODELAY,
    (char *)&val, sizeof(val));
  if (ret)
    return(NASD_FAIL);

  sock->sock.nodelay = nodelay;

  return(NASD_SUCCESS);
}

nasd_status_t
nasd_srpc_sys_sock_set_options(
  nasd_srpc_sock_t          *sock,
  nasd_srpc_sockbuf_opts_t  *opts)
{
  nasd_status_t rc;
  int ret, val;

  NASD_ASSERT(sock->sock.fd >= 0);
  rc = NASD_SUCCESS;  

  sock->sock.nodelay_thresh = opts->nodelay_thresh;

#if NASD_SRPC_USE_NONBLOCKING > 0
  rc = nasd_srpc_sys_sock_setblock(sock, 0);
#else /* NASD_SRPC_USE_NONBLOCKING > 0 */
  rc = nasd_srpc_sys_sock_setblock(sock, 1);
#endif /* NASD_SRPC_USE_NONBLOCKING > 0 */

  if (opts->sndbuf >= 0) {
    val = opts->sndbuf;
    ret = setsockopt(sock->sock.fd, SOL_SOCKET, SO_SNDBUF,
      (char *)&val, sizeof(val));
    if (ret)
      rc = NASD_SRPC_NOT_ALL_OPTIONS_SET;
  }

  if (opts->rcvbuf >= 0) {
    val = opts->rcvbuf;
    ret = setsockopt(sock->sock.fd, SOL_SOCKET, SO_RCVBUF,
      (char *)&val, sizeof(val));
    if (ret)
      rc = NASD_SRPC_NOT_ALL_OPTIONS_SET;
  }

#ifndef __linux__
  if (opts->sndlowat >= 0) {
    val = opts->sndlowat;
    ret = setsockopt(sock->sock.fd, SOL_SOCKET, SO_SNDLOWAT,
      (char *)&val, sizeof(val));
    if (ret)
      rc = NASD_SRPC_NOT_ALL_OPTIONS_SET;
  }
#endif /* !__linux__ */

#ifndef __linux__
  if (opts->rcvlowat >= 0) {
    val = opts->rcvlowat;
    ret = setsockopt(sock->sock.fd, SOL_SOCKET, SO_RCVLOWAT,
      (char *)&val, sizeof(val));
    if (ret)
      rc = NASD_SRPC_NOT_ALL_OPTIONS_SET;
  }
#endif /* !__linux__ */

  if (opts->nodelay >= 0) {
    val = opts->nodelay;
    ret = setsockopt(sock->sock.fd, IPPROTO_TCP, TCP_NODELAY,
      (char *)&val, sizeof(val));
    if (ret)
      rc = NASD_SRPC_NOT_ALL_OPTIONS_SET;
    else
      sock->sock.nodelay = opts->nodelay;
  }

  if (opts->keepalive >= 0) {
    val = opts->keepalive;
    ret = setsockopt(sock->sock.fd, SOL_SOCKET, SO_KEEPALIVE,
      (char *)&val, sizeof(val));
    if (ret)
      rc = NASD_SRPC_NOT_ALL_OPTIONS_SET;
  }

  if (opts->reuseaddr >= 0) {
    val = opts->reuseaddr;
    ret = setsockopt(sock->sock.fd, SOL_SOCKET, SO_REUSEADDR,
      (char *)&val, sizeof(val));
    if (ret)
      rc = NASD_SRPC_NOT_ALL_OPTIONS_SET;
  }

#ifndef __linux__
  if (opts->loopback >= 0) {
    val = opts->loopback;
    ret = setsockopt(sock->sock.fd, SOL_SOCKET, SO_USELOOPBACK,
      (char *)&val, sizeof(val));
    if (ret)
      rc = NASD_SRPC_NOT_ALL_OPTIONS_SET;
  }
#endif /* !__linux__ */

  return(rc);
}

nasd_status_t
nasd_srpc_sys_sock_init(
  nasd_srpc_sock_t  *sock)
{
  signal(SIGPIPE, SIG_IGN);
  return(NASD_SUCCESS);
}

nasd_status_t
nasd_srpc_sys_sock_conn(
  nasd_srpc_sock_t  *sock,
  nasd_uint32        ipaddr,
  nasd_uint16        ipport)
{
  struct sockaddr_in server;
  int ret, val, val_len;

  server.sin_family = AF_INET;
  server.sin_addr.s_addr = nasd_hton32(ipaddr);
  server.sin_port = nasd_hton16(ipport);
  bzero(server.sin_zero, sizeof(server.sin_zero));

  sock->sock.fd = socket(AF_INET, SOCK_STREAM, PF_UNSPEC);
  if (sock->sock.fd < 0) {
    return(NASD_FAIL);
  }

  ret = connect(sock->sock.fd, (struct sockaddr *)&server, sizeof(server));
  if (ret < 0) {
    close(sock->sock.fd);
    sock->sock.fd = (-1);
    return(NASD_SRPC_CANNOT_CONNECT);
  }

  val_len = sizeof(val);
  ret = getsockopt(sock->sock.fd, IPPROTO_TCP, TCP_NODELAY,
    (char *)&val, &val_len);
  if (ret || (val_len != sizeof(val))) {
    close(sock->sock.fd);
    sock->sock.fd = (-1);
    return(NASD_SRPC_CANNOT_CONNECT);
  }
  sock->sock.nodelay = val;

  return(NASD_SUCCESS);
}

nasd_status_t
nasd_srpc_sys_sock_send(
  nasd_srpc_sock_t    *sock,
  nasd_srpc_memvec_t  *memvec,
  int                  total_len,
  int                  *bytes_sent)
{
  int f, sent, ret, want, nio, this_sent;
  struct iovec iov[NASD_GS_MAX_IOV];
  nasd_srpc_memvec_t *vec;
  nasd_status_t rc;
  char *tmp;

  sent = 0;
  nio = 0;
  want = 0;

  NASD_ASSERT(sock->sock.fd >= 0);
  NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_calls);

  if ((sock->sock.nodelay_thresh >= 0) &&
    ((total_len >= sock->sock.nodelay_thresh) && (sock->sock.nodelay)))
  {
    rc = nasd_srpc_sys_sock_setnodelay(sock, 0);
    if (rc) {
      *bytes_sent = 0;
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
      return(rc);
    }
  }

  if ((sock->sock.nodelay_thresh >= 0) &&
    ((total_len < sock->sock.nodelay_thresh) && (sock->sock.nodelay == 0)))
  {
    rc = nasd_srpc_sys_sock_setnodelay(sock, 1);
    if (rc) {
      *bytes_sent = 0;
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
      return(rc);
    }
  }

  if (memvec->next == NULL) {
    /*
     * No scatter-gather optimization
     */
    NASD_ASSERT(memvec->len == total_len);
    tmp = (char *)memvec->buf;
    for(sent=0;sent<memvec->len;) {
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_write_calls);
      ret = write(sock->sock.fd, &tmp[sent], memvec->len-sent);
      if (ret <= 0) {
        if ((ret == 0) || (errno == EWOULDBLOCK)) {
          if (sock->sock.isblocking) {
            *bytes_sent = sent;
            NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
            return(NASD_FAIL);
          }
          NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_chgblock);
          rc = nasd_srpc_sys_sock_setblock(sock, 1);
          if (rc) {
            *bytes_sent = sent;
            NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
            return(NASD_FAIL);
          }
          continue;
        }
        *bytes_sent = sent;
        NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
        return(NASD_FAIL);
      }
      sent += ret;
    }
    *bytes_sent = sent;
    NASD_ASSERT((*bytes_sent) == total_len);
    return(NASD_SUCCESS);
  }

#if 1

  /*
   * Use batches of writev() to go through
   * the memvec list.
   */

  for(vec=memvec;vec;vec=vec->next) {
    if (vec->len == 0)
      continue;
    if (vec->len < 0) {
      *bytes_sent = sent;
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
      return(NASD_MEM_LIST_ERR);
    }
    iov[nio].iov_base = vec->buf;
    iov[nio].iov_len = vec->len;
    want += vec->len;
    nio++;
    if (nio == NASD_GS_MAX_IOV) {
      f = 0;
      ret = 0;
      this_sent = 0;
      while (this_sent < want) {
        NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_writev_calls);
        ret = writev(sock->sock.fd, &iov[f], nio-f);
        if (ret <= 0) {
          if ((ret == 0) || (errno == EWOULDBLOCK)) {
            if (sock->sock.isblocking) {
              *bytes_sent = sent;
              NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
              return(NASD_FAIL);
            }
            NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_chgblock);
            rc = nasd_srpc_sys_sock_setblock(sock, 1);
            if (rc) {
              *bytes_sent = sent;
              NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
              return(NASD_FAIL);
            }
            continue;
          }
          *bytes_sent = sent;
          NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
          return(NASD_FAIL);
        }
        sent += ret;
        this_sent += ret;
        if (this_sent < want) {
          /* Great. Figure out where we are. */
          for(;f<nio;f++) {
            if (ret < iov[f].iov_len) {
              break;
            }
            ret -= iov[f].iov_len;
          }
          NASD_ASSERT(f < nio);
          if (ret) {
            iov[f].iov_len -= ret;
            tmp = (char *)iov[f].iov_base;
            iov[f].iov_base = &tmp[ret];
          }
        }
      }
      nio = 0;
      want = 0;
    }
    NASD_ASSERT(nio < NASD_GS_MAX_IOV);
  }

  f = 0;
  ret = 0;
  this_sent = 0;
  while (this_sent < want) {
    NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_writev_calls);
    ret = writev(sock->sock.fd, &iov[f], nio-f);
    if (ret <= 0) {
      if ((ret == 0) || (errno == EWOULDBLOCK)) {
        if (sock->sock.isblocking) {
          *bytes_sent = sent;
          NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
          return(NASD_FAIL);
        }
        NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_chgblock);
        rc = nasd_srpc_sys_sock_setblock(sock, 1);
        if (rc) {
          *bytes_sent = sent;
          NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
          return(NASD_FAIL);
        }
        continue;
      }
      *bytes_sent = sent;
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
      return(NASD_FAIL);
    }
    sent += ret;
    this_sent += ret;
    if (this_sent < want) {
      /* Great. Figure out where we are. */
      for(;f<nio;f++) {
        if (ret < iov[f].iov_len)
          break;
        ret -= iov[f].iov_len;
      }
      NASD_ASSERT(f < nio);
      if (ret) {
        iov[f].iov_len -= ret;
        tmp = (char *)iov[f].iov_base;
        iov[f].iov_base = &tmp[ret];
      }
    }
  }
  *bytes_sent = sent;

#else

  /*
   * This avoids using writev()- it does the obvious
   * iteration of write() through the memvec list.
   */

  *bytes_sent = 0;
  for(vec=memvec;vec;vec=vec->next) {
    if (vec->len == 0)
      continue;
    if (vec->len < 0) {
      *bytes_sent = sent;
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
      return(NASD_MEM_LIST_ERR);
    }
    tmp = (char *)vec->buf;
    for(sent=0;sent<vec->len;) {
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_write_calls);
      ret = write(sock->sock.fd, &tmp[sent], vec->len-sent);
      if (ret <= 0) {
        if ((ret == 0) || (errno == EWOULDBLOCK)) {
          if (sock->sock.isblocking) {
            *bytes_sent = sent;
            NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
            return(NASD_FAIL);
          }
          NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_chgblock);
          rc = nasd_srpc_sys_sock_setblock(sock, 1);
          if (rc) {
            *bytes_sent = sent;
            NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
            return(NASD_FAIL);
          }
          continue;
        }
        NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_send_errs);
        return(NASD_FAIL);
      }
      sent += ret;
      *bytes_sent += ret;
    }
  }

#endif

  NASD_ASSERT((*bytes_sent) == total_len);
  return(NASD_SUCCESS);
}

nasd_status_t
nasd_srpc_sys_sock_recv(
  nasd_srpc_sock_t  *sock,
  void              *buf,
  int                len,
  int               *bytes_received,
  int                flags)
{
  nasd_status_t rc;
  int ret, got;
  char *tmp;

  NASD_ASSERT(sock->sock.fd >= 0);
  NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_calls);

  if ((flags&NASD_SRPC_RECV_NOWAIT) && (sock->sock.isblocking)) {
#if NASD_SRPC_USE_NONBLOCKING > 0
    NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_chgblock);
    rc = nasd_srpc_sys_sock_setblock(sock, 0);
    if (rc) {
      *bytes_received = 0;
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_errs);
      return(rc);
    }
#else /* NASD_SRPC_USE_NONBLOCKING > 0 */
    rc = nasd_srpc_sys_sock_bytes_pending(sock, &nb);
    if (rc) {
      *bytes_received = 0;
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_errs);
      return(rc);
    }
    if (nb == 0) {
      *bytes_received = 0;
      return(NASD_SUCCESS);
    }
#endif /* NASD_SRPC_USE_NONBLOCKING > 0 */
  }

#if NASD_SRPC_USE_NONBLOCKING > 0
  if ((!(flags&NASD_SRPC_RECV_NOWAIT)) && (sock->sock.isblocking == 0)) {
    NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_chgblock);
    rc = nasd_srpc_sys_sock_setblock(sock, 1);
    if (rc) {
      *bytes_received = 0;
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_errs);
      return(rc);
    }
  }
#endif /* NASD_SRPC_USE_NONBLOCKING > 0 */

  if (flags & NASD_SRPC_RECV_FILL) {
    tmp = (char *)buf;
    for(got=0;got<len;) {
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_read_calls);
      do_read(ret, sock, &tmp[got], len-got);
      if (ret <= 0) {
        *bytes_received = got;
        if ((ret == 0) || ((ret < 0) && (errno == EWOULDBLOCK))) {
          if (sock->sock.isblocking) {
            NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_errs);
            return(NASD_FAIL);
          }
          if (got) {
            NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_chgblock);
            rc = nasd_srpc_sys_sock_setblock(sock, 1);
            if (rc) {
              NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_errs);
              return(rc);
            }
            continue;
          }
          return(NASD_SUCCESS);
        }
        NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_errs);
        return(NASD_FAIL);
      }
      got += ret;
      if ((got < len) && (sock->sock.isblocking == 0)) {
        NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_chgblock);
        rc = nasd_srpc_sys_sock_setblock(sock, 1);
        if (rc) {
          *bytes_received = got;
          NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_errs);
          return(NASD_FAIL);
        }
      }
    }
#if 0
    if (got != len) {
      nasd_printf("got: %d, len: %d\n", got, len);
      NASD_PANIC();
    }
#else
    NASD_ASSERT(got == len);
#endif
    *bytes_received = got;
    return(NASD_SUCCESS);
  }
  else {
    NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_read_calls);
    do_read(ret, sock, buf, len);
    if (ret <= 0) {
      *bytes_received = 0;
      if ((ret == 0) || ((ret < 0) && (errno == EWOULDBLOCK))) {
        if (sock->sock.isblocking) {
          NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_errs);
          return(NASD_FAIL);
        }
        return(NASD_SUCCESS);
      }
      NASD_SRPC_INC_COUNTER(&nasd_srpc_stats.sock_recv_errs);
      return(NASD_FAIL);
    }
    *bytes_received = ret;
    return(NASD_SUCCESS);
  }

  /* NOTREACHED */
  NASD_PANIC();
}

nasd_status_t
nasd_srpc_sys_sock_destroy(
  nasd_srpc_sock_t  *sock)
{
  int ret;

  ret = close(sock->sock.fd);
  if (ret)
    return(NASD_FAIL);
  sock->sock.fd = (-1);
  return(NASD_SUCCESS);
}

/* Local Variables:  */
/* indent-tabs-mode: nil */
/* tab-width: 2 */
/* End: */
