// Copyright 2018 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package procfs

// While implementing parsing of /proc/[pid]/mountstats, this blog was used
// heavily as a reference:
//   https://utcc.utoronto.ca/~cks/space/blog/linux/NFSMountstatsIndex
//
// Special thanks to Chris Siebenmann for all of his posts explaining the
// various statistics available for NFS.

import (
	
	
	
	
	
	
)

// Constants shared between multiple functions.
const (
	deviceEntryLen = 8

	fieldBytesLen  = 8
	fieldEventsLen = 27

	statVersion10 = "1.0"
	statVersion11 = "1.1"

	fieldTransport10TCPLen = 10
	fieldTransport10UDPLen = 7

	fieldTransport11TCPLen = 13
	fieldTransport11UDPLen = 10

	// Kernel version >= 4.14 MaxLen
	// See: https://elixir.bootlin.com/linux/v6.4.8/source/net/sunrpc/xprtrdma/xprt_rdma.h#L393
	fieldTransport11RDMAMaxLen = 28

	// Kernel version <= 4.2 MinLen
	// See: https://elixir.bootlin.com/linux/v4.2.8/source/net/sunrpc/xprtrdma/xprt_rdma.h#L331
	fieldTransport11RDMAMinLen = 20
)

// A Mount is a device mount parsed from /proc/[pid]/mountstats.
type Mount struct {
	// Name of the device.
	Device string
	// The mount point of the device.
	Mount string
	// The filesystem type used by the device.
	Type string
	// If available additional statistics related to this Mount.
	// Use a type assertion to determine if additional statistics are available.
	Stats MountStats
}

// A MountStats is a type which contains detailed statistics for a specific
// type of Mount.
type MountStats interface {
	mountStats()
}

// A MountStatsNFS is a MountStats implementation for NFSv3 and v4 mounts.
type MountStatsNFS struct {
	// The version of statistics provided.
	StatVersion string
	// The mount options of the NFS mount.
	Opts map[string]string
	// The age of the NFS mount.
	Age time.Duration
	// Statistics related to byte counters for various operations.
	Bytes NFSBytesStats
	// Statistics related to various NFS event occurrences.
	Events NFSEventsStats
	// Statistics broken down by filesystem operation.
	Operations []NFSOperationStats
	// Statistics about the NFS RPC transport.
	Transport []NFSTransportStats
}

// mountStats implements MountStats.
func ( MountStatsNFS) () {}

// A NFSBytesStats contains statistics about the number of bytes read and written
// by an NFS client to and from an NFS server.
type NFSBytesStats struct {
	// Number of bytes read using the read() syscall.
	Read uint64
	// Number of bytes written using the write() syscall.
	Write uint64
	// Number of bytes read using the read() syscall in O_DIRECT mode.
	DirectRead uint64
	// Number of bytes written using the write() syscall in O_DIRECT mode.
	DirectWrite uint64
	// Number of bytes read from the NFS server, in total.
	ReadTotal uint64
	// Number of bytes written to the NFS server, in total.
	WriteTotal uint64
	// Number of pages read directly via mmap()'d files.
	ReadPages uint64
	// Number of pages written directly via mmap()'d files.
	WritePages uint64
}

// A NFSEventsStats contains statistics about NFS event occurrences.
type NFSEventsStats struct {
	// Number of times cached inode attributes are re-validated from the server.
	InodeRevalidate uint64
	// Number of times cached dentry nodes are re-validated from the server.
	DnodeRevalidate uint64
	// Number of times an inode cache is cleared.
	DataInvalidate uint64
	// Number of times cached inode attributes are invalidated.
	AttributeInvalidate uint64
	// Number of times files or directories have been open()'d.
	VFSOpen uint64
	// Number of times a directory lookup has occurred.
	VFSLookup uint64
	// Number of times permissions have been checked.
	VFSAccess uint64
	// Number of updates (and potential writes) to pages.
	VFSUpdatePage uint64
	// Number of pages read directly via mmap()'d files.
	VFSReadPage uint64
	// Number of times a group of pages have been read.
	VFSReadPages uint64
	// Number of pages written directly via mmap()'d files.
	VFSWritePage uint64
	// Number of times a group of pages have been written.
	VFSWritePages uint64
	// Number of times directory entries have been read with getdents().
	VFSGetdents uint64
	// Number of times attributes have been set on inodes.
	VFSSetattr uint64
	// Number of pending writes that have been forcefully flushed to the server.
	VFSFlush uint64
	// Number of times fsync() has been called on directories and files.
	VFSFsync uint64
	// Number of times locking has been attempted on a file.
	VFSLock uint64
	// Number of times files have been closed and released.
	VFSFileRelease uint64
	// Unknown.  Possibly unused.
	CongestionWait uint64
	// Number of times files have been truncated.
	Truncation uint64
	// Number of times a file has been grown due to writes beyond its existing end.
	WriteExtension uint64
	// Number of times a file was removed while still open by another process.
	SillyRename uint64
	// Number of times the NFS server gave less data than expected while reading.
	ShortRead uint64
	// Number of times the NFS server wrote less data than expected while writing.
	ShortWrite uint64
	// Number of times the NFS server indicated EJUKEBOX; retrieving data from
	// offline storage.
	JukeboxDelay uint64
	// Number of NFS v4.1+ pNFS reads.
	PNFSRead uint64
	// Number of NFS v4.1+ pNFS writes.
	PNFSWrite uint64
}

// A NFSOperationStats contains statistics for a single operation.
type NFSOperationStats struct {
	// The name of the operation.
	Operation string
	// Number of requests performed for this operation.
	Requests uint64
	// Number of times an actual RPC request has been transmitted for this operation.
	Transmissions uint64
	// Number of times a request has had a major timeout.
	MajorTimeouts uint64
	// Number of bytes sent for this operation, including RPC headers and payload.
	BytesSent uint64
	// Number of bytes received for this operation, including RPC headers and payload.
	BytesReceived uint64
	// Duration all requests spent queued for transmission before they were sent.
	CumulativeQueueMilliseconds uint64
	// Duration it took to get a reply back after the request was transmitted.
	CumulativeTotalResponseMilliseconds uint64
	// Duration from when a request was enqueued to when it was completely handled.
	CumulativeTotalRequestMilliseconds uint64
	// The count of operations that complete with tk_status < 0.  These statuses usually indicate error conditions.
	Errors uint64
}

// A NFSTransportStats contains statistics for the NFS mount RPC requests and
// responses.
type NFSTransportStats struct {
	// The transport protocol used for the NFS mount.
	Protocol string
	// The local port used for the NFS mount.
	Port uint64
	// Number of times the client has had to establish a connection from scratch
	// to the NFS server.
	Bind uint64
	// Number of times the client has made a TCP connection to the NFS server.
	Connect uint64
	// Duration (in jiffies, a kernel internal unit of time) the NFS mount has
	// spent waiting for connections to the server to be established.
	ConnectIdleTime uint64
	// Duration since the NFS mount last saw any RPC traffic.
	IdleTimeSeconds uint64
	// Number of RPC requests for this mount sent to the NFS server.
	Sends uint64
	// Number of RPC responses for this mount received from the NFS server.
	Receives uint64
	// Number of times the NFS server sent a response with a transaction ID
	// unknown to this client.
	BadTransactionIDs uint64
	// A running counter, incremented on each request as the current difference
	// ebetween sends and receives.
	CumulativeActiveRequests uint64
	// A running counter, incremented on each request by the current backlog
	// queue size.
	CumulativeBacklog uint64

	// Stats below only available with stat version 1.1.

	// Maximum number of simultaneously active RPC requests ever used.
	MaximumRPCSlotsUsed uint64
	// A running counter, incremented on each request as the current size of the
	// sending queue.
	CumulativeSendingQueue uint64
	// A running counter, incremented on each request as the current size of the
	// pending queue.
	CumulativePendingQueue uint64

	// Stats below only available with stat version 1.1.
	// Transport over RDMA

	// accessed when sending a call
	ReadChunkCount   uint64
	WriteChunkCount  uint64
	ReplyChunkCount  uint64
	TotalRdmaRequest uint64

	// rarely accessed error counters
	PullupCopyCount      uint64
	HardwayRegisterCount uint64
	FailedMarshalCount   uint64
	BadReplyCount        uint64
	MrsRecovered         uint64
	MrsOrphaned          uint64
	MrsAllocated         uint64
	EmptySendctxQ        uint64

	// accessed when receiving a reply
	TotalRdmaReply    uint64
	FixupCopyCount    uint64
	ReplyWaitsForSend uint64
	LocalInvNeeded    uint64
	NomsgCallCount    uint64
	BcallCount        uint64
}

// parseMountStats parses a /proc/[pid]/mountstats file and returns a slice
// of Mount structures containing detailed information about each mount.
// If available, statistics for each mount are parsed as well.
func parseMountStats( io.Reader) ([]*Mount, error) {
	const (
		            = "device"
		 = "statvers="

		 = "nfs"
		 = "nfs4"
	)

	var  []*Mount

	 := bufio.NewScanner()
	for .Scan() {
		// Only look for device entries in this function
		 := strings.Fields(string(.Bytes()))
		if len() == 0 || [0] !=  {
			continue
		}

		,  := parseMount()
		if  != nil {
			return nil, 
		}

		// Does this mount also possess statistics information?
		if len() > deviceEntryLen {
			// Only NFSv3 and v4 are supported for parsing statistics
			if .Type !=  && .Type !=  {
				return nil, fmt.Errorf("%w: Cannot parse MountStats for %q", ErrFileParse, .Type)
			}

			 := strings.TrimPrefix([8], )

			,  := parseMountStatsNFS(, )
			if  != nil {
				return nil, 
			}

			.Stats = 
		}

		 = append(, )
	}

	return , .Err()
}

// parseMount parses an entry in /proc/[pid]/mountstats in the format:
//
//	device [device] mounted on [mount] with fstype [type]
func parseMount( []string) (*Mount, error) {
	if len() < deviceEntryLen {
		return nil, fmt.Errorf("%w: Invalid device %q", ErrFileParse, )
	}

	// Check for specific words appearing at specific indices to ensure
	// the format is consistent with what we expect
	 := []struct {
		 int
		 string
	}{
		{: 0, : "device"},
		{: 2, : "mounted"},
		{: 3, : "on"},
		{: 5, : "with"},
		{: 6, : "fstype"},
	}

	for ,  := range  {
		if [.] != . {
			return nil, fmt.Errorf("%w: Invalid device %q", ErrFileParse, )
		}
	}

	return &Mount{
		Device: [1],
		Mount:  [4],
		Type:   [7],
	}, nil
}

// parseMountStatsNFS parses a MountStatsNFS by scanning additional information
// related to NFS statistics.
func parseMountStatsNFS( *bufio.Scanner,  string) (*MountStatsNFS, error) {
	// Field indicators for parsing specific types of data
	const (
		       = "opts:"
		        = "age:"
		      = "bytes:"
		     = "events:"
		 = "per-op"
		  = "xprt:"
	)

	 := &MountStatsNFS{
		StatVersion: ,
	}

	for .Scan() {
		 := strings.Fields(string(.Bytes()))
		if len() == 0 {
			break
		}

		switch [0] {
		case :
			if len() < 2 {
				return nil, fmt.Errorf("%w: Incomplete information for NFS stats: %v", ErrFileParse, )
			}
			if .Opts == nil {
				.Opts = map[string]string{}
			}
			for ,  := range strings.Split([1], ",") {
				 := strings.Split(, "=")
				if len() == 2 {
					.Opts[[0]] = [1]
				} else {
					.Opts[] = ""
				}
			}
		case :
			if len() < 2 {
				return nil, fmt.Errorf("%w: Incomplete information for NFS stats: %v", ErrFileParse, )
			}
			// Age integer is in seconds
			,  := time.ParseDuration([1] + "s")
			if  != nil {
				return nil, 
			}

			.Age = 
		case :
			if len() < 2 {
				return nil, fmt.Errorf("%w: Incomplete information for NFS stats: %v", ErrFileParse, )
			}
			,  := parseNFSBytesStats([1:])
			if  != nil {
				return nil, 
			}

			.Bytes = *
		case :
			if len() < 2 {
				return nil, fmt.Errorf("%w: Incomplete information for NFS events: %v", ErrFileParse, )
			}
			,  := parseNFSEventsStats([1:])
			if  != nil {
				return nil, 
			}

			.Events = *
		case :
			if len() < 3 {
				return nil, fmt.Errorf("%w: Incomplete information for NFS transport stats: %v", ErrFileParse, )
			}

			,  := parseNFSTransportStats([1:], )
			if  != nil {
				return nil, 
			}

			.Transport = append(.Transport, *)
		}

		// When encountering "per-operation statistics", we must break this
		// loop and parse them separately to ensure we can terminate parsing
		// before reaching another device entry; hence why this 'if' statement
		// is not just another switch case
		if [0] ==  {
			break
		}
	}

	if  := .Err();  != nil {
		return nil, 
	}

	// NFS per-operation stats appear last before the next device entry
	,  := parseNFSOperationStats()
	if  != nil {
		return nil, 
	}

	.Operations = 

	return , nil
}

// parseNFSBytesStats parses a NFSBytesStats line using an input set of
// integer fields.
func parseNFSBytesStats( []string) (*NFSBytesStats, error) {
	if len() != fieldBytesLen {
		return nil, fmt.Errorf("%w: Invalid NFS bytes stats: %v", ErrFileParse, )
	}

	 := make([]uint64, 0, fieldBytesLen)
	for ,  := range  {
		,  := strconv.ParseUint(, 10, 64)
		if  != nil {
			return nil, 
		}

		 = append(, )
	}

	return &NFSBytesStats{
		Read:        [0],
		Write:       [1],
		DirectRead:  [2],
		DirectWrite: [3],
		ReadTotal:   [4],
		WriteTotal:  [5],
		ReadPages:   [6],
		WritePages:  [7],
	}, nil
}

// parseNFSEventsStats parses a NFSEventsStats line using an input set of
// integer fields.
func parseNFSEventsStats( []string) (*NFSEventsStats, error) {
	if len() != fieldEventsLen {
		return nil, fmt.Errorf("%w: invalid NFS events stats: %v", ErrFileParse, )
	}

	 := make([]uint64, 0, fieldEventsLen)
	for ,  := range  {
		,  := strconv.ParseUint(, 10, 64)
		if  != nil {
			return nil, 
		}

		 = append(, )
	}

	return &NFSEventsStats{
		InodeRevalidate:     [0],
		DnodeRevalidate:     [1],
		DataInvalidate:      [2],
		AttributeInvalidate: [3],
		VFSOpen:             [4],
		VFSLookup:           [5],
		VFSAccess:           [6],
		VFSUpdatePage:       [7],
		VFSReadPage:         [8],
		VFSReadPages:        [9],
		VFSWritePage:        [10],
		VFSWritePages:       [11],
		VFSGetdents:         [12],
		VFSSetattr:          [13],
		VFSFlush:            [14],
		VFSFsync:            [15],
		VFSLock:             [16],
		VFSFileRelease:      [17],
		CongestionWait:      [18],
		Truncation:          [19],
		WriteExtension:      [20],
		SillyRename:         [21],
		ShortRead:           [22],
		ShortWrite:          [23],
		JukeboxDelay:        [24],
		PNFSRead:            [25],
		PNFSWrite:           [26],
	}, nil
}

// parseNFSOperationStats parses a slice of NFSOperationStats by scanning
// additional information about per-operation statistics until an empty
// line is reached.
func parseNFSOperationStats( *bufio.Scanner) ([]NFSOperationStats, error) {
	const (
		// Minimum number of expected fields in each per-operation statistics set
		 = 9
	)

	var  []NFSOperationStats

	for .Scan() {
		 := strings.Fields(string(.Bytes()))
		if len() == 0 {
			// Must break when reading a blank line after per-operation stats to
			// enable top-level function to parse the next device entry
			break
		}

		if len() <  {
			return nil, fmt.Errorf("%w: invalid NFS per-operations stats: %v", ErrFileParse, )
		}

		// Skip string operation name for integers
		 := make([]uint64, 0, -1)
		for ,  := range [1:] {
			,  := strconv.ParseUint(, 10, 64)
			if  != nil {
				return nil, 
			}

			 = append(, )
		}
		 := NFSOperationStats{
			Operation:                           strings.TrimSuffix([0], ":"),
			Requests:                            [0],
			Transmissions:                       [1],
			MajorTimeouts:                       [2],
			BytesSent:                           [3],
			BytesReceived:                       [4],
			CumulativeQueueMilliseconds:         [5],
			CumulativeTotalResponseMilliseconds: [6],
			CumulativeTotalRequestMilliseconds:  [7],
		}

		if len() > 8 {
			.Errors = [8]
		}

		 = append(, )
	}

	return , .Err()
}

// parseNFSTransportStats parses a NFSTransportStats line using an input set of
// integer fields matched to a specific stats version.
func parseNFSTransportStats( []string,  string) (*NFSTransportStats, error) {
	// Extract the protocol field. It is the only string value in the line
	 := [0]
	 = [1:]

	switch  {
	case statVersion10:
		var  int
		switch  {
		case "tcp":
			 = fieldTransport10TCPLen
		case "udp":
			 = fieldTransport10UDPLen
		default:
			return nil, fmt.Errorf("%w: Invalid NFS protocol \"%s\" in stats 1.0 statement: %v", ErrFileParse, , )
		}
		if len() !=  {
			return nil, fmt.Errorf("%w: Invalid NFS transport stats 1.0 statement: %v", ErrFileParse, )
		}
	case statVersion11:
		var  int
		switch  {
		case "tcp":
			 = fieldTransport11TCPLen
		case "udp":
			 = fieldTransport11UDPLen
		case "rdma":
			 = fieldTransport11RDMAMinLen
		default:
			return nil, fmt.Errorf("%w: invalid NFS protocol \"%s\" in stats 1.1 statement: %v", ErrFileParse, , )
		}
		if (len() !=  && ( == "tcp" ||  == "udp")) ||
			( == "rdma" && len() < ) {
			return nil, fmt.Errorf("%w: invalid NFS transport stats 1.1 statement: %v, protocol: %v", ErrFileParse, , )
		}
	default:
		return nil, fmt.Errorf("%w: Unrecognized NFS transport stats version: %q, protocol: %v", ErrFileParse, , )
	}

	// Allocate enough for v1.1 stats since zero value for v1.1 stats will be okay
	// in a v1.0 response. Since the stat length is bigger for TCP stats, we use
	// the TCP length here.
	//
	// Note: slice length must be set to length of v1.1 stats to avoid a panic when
	// only v1.0 stats are present.
	// See: https://github.com/prometheus/node_exporter/issues/571.
	//
	// Note: NFS Over RDMA slice length is fieldTransport11RDMAMaxLen
	 := make([]uint64, fieldTransport11RDMAMaxLen+3)
	for ,  := range  {
		,  := strconv.ParseUint(, 10, 64)
		if  != nil {
			return nil, 
		}

		[] = 
	}

	// The fields differ depending on the transport protocol (TCP or UDP)
	// From https://utcc.utoronto.ca/%7Ecks/space/blog/linux/NFSMountstatsXprt
	//
	// For the udp RPC transport there is no connection count, connect idle time,
	// or idle time (fields #3, #4, and #5); all other fields are the same. So
	// we set them to 0 here.
	switch  {
	case "udp":
		 = append([:2], append(make([]uint64, 3), [2:]...)...)
	case "tcp":
		 = append([:fieldTransport11TCPLen], make([]uint64, fieldTransport11RDMAMaxLen-fieldTransport11TCPLen+3)...)
	case "rdma":
		 = append([:fieldTransport10TCPLen], append(make([]uint64, 3), [fieldTransport10TCPLen:]...)...)
	}

	return &NFSTransportStats{
		// NFS xprt over tcp or udp
		Protocol:                 ,
		Port:                     [0],
		Bind:                     [1],
		Connect:                  [2],
		ConnectIdleTime:          [3],
		IdleTimeSeconds:          [4],
		Sends:                    [5],
		Receives:                 [6],
		BadTransactionIDs:        [7],
		CumulativeActiveRequests: [8],
		CumulativeBacklog:        [9],

		// NFS xprt over tcp or udp
		// And statVersion 1.1
		MaximumRPCSlotsUsed:    [10],
		CumulativeSendingQueue: [11],
		CumulativePendingQueue: [12],

		// NFS xprt over rdma
		// And stat Version 1.1
		ReadChunkCount:       [13],
		WriteChunkCount:      [14],
		ReplyChunkCount:      [15],
		TotalRdmaRequest:     [16],
		PullupCopyCount:      [17],
		HardwayRegisterCount: [18],
		FailedMarshalCount:   [19],
		BadReplyCount:        [20],
		MrsRecovered:         [21],
		MrsOrphaned:          [22],
		MrsAllocated:         [23],
		EmptySendctxQ:        [24],
		TotalRdmaReply:       [25],
		FixupCopyCount:       [26],
		ReplyWaitsForSend:    [27],
		LocalInvNeeded:       [28],
		NomsgCallCount:       [29],
		BcallCount:           [30],
	}, nil
}