// Package cid implements the Content-IDentifiers specification // (https://github.com/ipld/cid) in Go. CIDs are // self-describing content-addressed identifiers useful for // distributed information systems. CIDs are used in the IPFS // (https://ipfs.io) project ecosystem. // // CIDs have two major versions. A CIDv0 corresponds to a multihash of type // DagProtobuf, is deprecated and exists for compatibility reasons. Usually, // CIDv1 should be used. // // A CIDv1 has four parts: // // <cidv1> ::= <multibase-prefix><cid-version><multicodec-packed-content-type><multihash-content-address> // // As shown above, the CID implementation relies heavily on Multiformats, // particularly Multibase // (https://github.com/multiformats/go-multibase), Multicodec // (https://github.com/multiformats/multicodec) and Multihash // implementations (https://github.com/multiformats/go-multihash).
package cid import ( mbase mh varint ) // UnsupportedVersionString just holds an error message const UnsupportedVersionString = "<unsupported cid version>" // ErrInvalidCid is an error that indicates that a CID is invalid. type ErrInvalidCid struct { Err error } func ( ErrInvalidCid) () string { return fmt.Sprintf("invalid cid: %s", .Err) } func ( ErrInvalidCid) () error { return .Err } func ( ErrInvalidCid) ( error) bool { switch .(type) { case ErrInvalidCid, *ErrInvalidCid: return true default: return false } } var ( // ErrCidTooShort means that the cid passed to decode was not long // enough to be a valid Cid ErrCidTooShort = ErrInvalidCid{errors.New("cid too short")} // ErrInvalidEncoding means that selected encoding is not supported // by this Cid version ErrInvalidEncoding = errors.New("invalid base encoding") ) // Consts below are DEPRECATED and left only for legacy reasons: // <https://github.com/ipfs/go-cid/pull/137> // Modern code should use consts from go-multicodec instead: // <https://github.com/multiformats/go-multicodec> const ( // common ones Raw = 0x55 DagProtobuf = 0x70 // https://ipld.io/docs/codecs/known/dag-pb/ DagCBOR = 0x71 // https://ipld.io/docs/codecs/known/dag-cbor/ DagJSON = 0x0129 // https://ipld.io/docs/codecs/known/dag-json/ Libp2pKey = 0x72 // https://github.com/libp2p/specs/blob/master/peer-ids/peer-ids.md#peer-ids // other GitRaw = 0x78 DagJOSE = 0x85 // https://ipld.io/specs/codecs/dag-jose/spec/ EthBlock = 0x90 EthBlockList = 0x91 EthTxTrie = 0x92 EthTx = 0x93 EthTxReceiptTrie = 0x94 EthTxReceipt = 0x95 EthStateTrie = 0x96 EthAccountSnapshot = 0x97 EthStorageTrie = 0x98 BitcoinBlock = 0xb0 BitcoinTx = 0xb1 ZcashBlock = 0xc0 ZcashTx = 0xc1 DecredBlock = 0xe0 DecredTx = 0xe1 DashBlock = 0xf0 DashTx = 0xf1 FilCommitmentUnsealed = 0xf101 FilCommitmentSealed = 0xf102 ) // tryNewCidV0 tries to convert a multihash into a CIDv0 CID and returns an // error on failure. func tryNewCidV0( mh.Multihash) (Cid, error) { // Need to make sure hash is valid for CidV0 otherwise we will // incorrectly detect it as CidV1 in the Version() method , := mh.Decode() if != nil { return Undef, ErrInvalidCid{} } if .Code != mh.SHA2_256 || .Length != 32 { return Undef, ErrInvalidCid{fmt.Errorf("invalid hash for cidv0 %d-%d", .Code, .Length)} } return Cid{string()}, nil } // NewCidV0 returns a Cid-wrapped multihash. // They exist to allow IPFS to work with Cids while keeping // compatibility with the plain-multihash format used used in IPFS. // NewCidV1 should be used preferentially. // // Panics if the multihash isn't sha2-256. func ( mh.Multihash) Cid { , := tryNewCidV0() if != nil { panic() } return } // NewCidV1 returns a new Cid using the given multicodec-packed // content type. // // Panics if the multihash is invalid. func ( uint64, mh.Multihash) Cid { := len() // Two 8 bytes (max) numbers plus hash. // We use strings.Builder to only allocate once. var strings.Builder .Grow(1 + varint.UvarintSize() + ) .WriteByte(1) var [binary.MaxVarintLen64]byte := varint.PutUvarint([:], ) .Write([:]) , := .Write() if != { panic("copy hash length is inconsistent") } return Cid{.String()} } var ( _ encoding.BinaryMarshaler = Cid{} _ encoding.BinaryUnmarshaler = (*Cid)(nil) _ encoding.TextMarshaler = Cid{} _ encoding.TextUnmarshaler = (*Cid)(nil) ) // Cid represents a self-describing content addressed // identifier. It is formed by a Version, a Codec (which indicates // a multicodec-packed content type) and a Multihash. type Cid struct{ str string } // Undef can be used to represent a nil or undefined Cid, using Cid{} // directly is also acceptable. var Undef = Cid{} // Defined returns true if a Cid is defined // Calling any other methods on an undefined Cid will result in // undefined behavior. func ( Cid) () bool { return .str != "" } // Parse is a short-hand function to perform Decode, Cast etc... on // a generic interface{} type. func ( interface{}) (Cid, error) { switch v2 := .(type) { case string: if strings.Contains(, "/ipfs/") { return Decode(strings.Split(, "/ipfs/")[1]) } return Decode() case []byte: return Cast() case mh.Multihash: return tryNewCidV0() case Cid: return , nil default: return Undef, ErrInvalidCid{fmt.Errorf("can't parse %+v as Cid", )} } } // MustParse calls Parse but will panic on error. func ( interface{}) Cid { , := Parse() if != nil { panic() } return } // Decode parses a Cid-encoded string and returns a Cid object. // For CidV1, a Cid-encoded string is primarily a multibase string: // // <multibase-type-code><base-encoded-string> // // The base-encoded string represents a: // // <version><codec-type><multihash> // // Decode will also detect and parse CidV0 strings. Strings // starting with "Qm" are considered CidV0 and treated directly // as B58-encoded multihashes. func ( string) (Cid, error) { if len() < 2 { return Undef, ErrCidTooShort } if len() == 46 && [:2] == "Qm" { , := mh.FromB58String() if != nil { return Undef, ErrInvalidCid{} } return tryNewCidV0() } , , := mbase.Decode() if != nil { return Undef, ErrInvalidCid{} } return Cast() } // Extract the encoding from a Cid. If Decode on the same string did // not return an error neither will this function. func ( string) (mbase.Encoding, error) { if len() < 2 { return -1, ErrCidTooShort } if len() == 46 && [:2] == "Qm" { return mbase.Base58BTC, nil } := mbase.Encoding([0]) // check encoding is valid , := mbase.NewEncoder() if != nil { return -1, ErrInvalidCid{} } return , nil } // Cast takes a Cid data slice, parses it and returns a Cid. // For CidV1, the data buffer is in the form: // // <version><codec-type><multihash> // // CidV0 are also supported. In particular, data buffers starting // with length 34 bytes, which starts with bytes [18,32...] are considered // binary multihashes. // // Please use decode when parsing a regular Cid string, as Cast does not // expect multibase-encoded data. Cast accepts the output of Cid.Bytes(). func ( []byte) (Cid, error) { , , := CidFromBytes() if != nil { return Undef, ErrInvalidCid{} } if != len() { return Undef, ErrInvalidCid{fmt.Errorf("trailing bytes in data buffer passed to cid Cast")} } return , nil } // UnmarshalBinary is equivalent to Cast(). It implements the // encoding.BinaryUnmarshaler interface. func ( *Cid) ( []byte) error { , := Cast() if != nil { return } .str = .str return nil } // UnmarshalText is equivalent to Decode(). It implements the // encoding.TextUnmarshaler interface. func ( *Cid) ( []byte) error { , := Decode(string()) if != nil { return } .str = .str return nil } // Version returns the Cid version. func ( Cid) () uint64 { if len(.str) == 34 && .str[0] == 18 && .str[1] == 32 { return 0 } return 1 } // Type returns the multicodec-packed content type of a Cid. func ( Cid) () uint64 { if .Version() == 0 { return DagProtobuf } , , := uvarint(.str) , , := uvarint(.str[:]) return } // String returns the default string representation of a // Cid. Currently, Base32 is used for CIDV1 as the encoding for the // multibase string, Base58 is used for CIDV0. func ( Cid) () string { switch .Version() { case 0: return .Hash().B58String() case 1: , := mbase.Encode(mbase.Base32, .Bytes()) if != nil { panic("should not error with hardcoded mbase: " + .Error()) } return default: panic("not possible to reach this point") } } // String returns the string representation of a Cid // encoded is selected base func ( Cid) ( mbase.Encoding) (string, error) { switch .Version() { case 0: if != mbase.Base58BTC { return "", ErrInvalidEncoding } return .Hash().B58String(), nil case 1: return mbase.Encode(, .Bytes()) default: panic("not possible to reach this point") } } // Encode return the string representation of a Cid in a given base // when applicable. Version 0 Cid's are always in Base58 as they do // not take a multibase prefix. func ( Cid) ( mbase.Encoder) string { switch .Version() { case 0: return .Hash().B58String() case 1: return .Encode(.Bytes()) default: panic("not possible to reach this point") } } // Hash returns the multihash contained by a Cid. func ( Cid) () mh.Multihash { := .Bytes() if .Version() == 0 { return mh.Multihash() } // skip version length , , := varint.FromUvarint() // skip codec length , , := varint.FromUvarint([:]) return mh.Multihash([+:]) } // Bytes returns the byte representation of a Cid. // The output of bytes can be parsed back into a Cid // with Cast(). // // If c.Defined() == false, it return a nil slice and may not // be parsable with Cast(). func ( Cid) () []byte { if !.Defined() { return nil } return []byte(.str) } // ByteLen returns the length of the CID in bytes. // It's equivalent to `len(c.Bytes())`, but works without an allocation, // and should therefore be preferred. // // (See also the WriteTo method for other important operations that work without allocation.) func ( Cid) () int { return len(.str) } // WriteBytes writes the CID bytes to the given writer. // This method works without incurring any allocation. // // (See also the ByteLen method for other important operations that work without allocation.) func ( Cid) ( io.Writer) (int, error) { , := io.WriteString(, .str) if != nil { return , } if != len(.str) { return , fmt.Errorf("failed to write entire cid string") } return , nil } // MarshalBinary is equivalent to Bytes(). It implements the // encoding.BinaryMarshaler interface. func ( Cid) () ([]byte, error) { return .Bytes(), nil } // MarshalText is equivalent to String(). It implements the // encoding.TextMarshaler interface. func ( Cid) () ([]byte, error) { return []byte(.String()), nil } // Equals checks that two Cids are the same. // In order for two Cids to be considered equal, the // Version, the Codec and the Multihash must match. func ( Cid) ( Cid) bool { return == } // UnmarshalJSON parses the JSON representation of a Cid. func ( *Cid) ( []byte) error { if len() < 2 { return ErrInvalidCid{fmt.Errorf("invalid cid json blob")} } := struct { string `json:"/"` }{} := & := json.Unmarshal(, &) if != nil { return ErrInvalidCid{} } if == nil { * = Cid{} return nil } if . == "" { return ErrInvalidCid{fmt.Errorf("cid was incorrectly formatted")} } , := Decode(.) if != nil { return ErrInvalidCid{} } * = return nil } // MarshalJSON procudes a JSON representation of a Cid, which looks as follows: // // { "/": "<cid-string>" } // // Note that this formatting comes from the IPLD specification // (https://github.com/ipld/specs/tree/master/ipld) func ( Cid) () ([]byte, error) { if !.Defined() { return []byte("null"), nil } return []byte(fmt.Sprintf("{\"/\":\"%s\"}", .String())), nil } // KeyString returns the binary representation of the Cid as a string func ( Cid) () string { return .str } // Loggable returns a Loggable (as defined by // https://godoc.org/github.com/ipfs/go-log). func ( Cid) () map[string]interface{} { return map[string]interface{}{ "cid": , } } // Prefix builds and returns a Prefix out of a Cid. func ( Cid) () Prefix { if .Version() == 0 { return Prefix{ MhType: mh.SHA2_256, MhLength: 32, Version: 0, Codec: DagProtobuf, } } := 0 , , := uvarint(.str[:]) += , , := uvarint(.str[:]) += , , := uvarint(.str[:]) += , , := uvarint(.str[:]) return Prefix{ MhType: , MhLength: int(), Version: , Codec: , } } // Prefix represents all the metadata of a Cid, // that is, the Version, the Codec, the Multihash type // and the Multihash length. It does not contains // any actual content information. // NOTE: The use -1 in MhLength to mean default length is deprecated, // // use the V0Builder or V1Builder structures instead type Prefix struct { Version uint64 Codec uint64 MhType uint64 MhLength int } // Sum uses the information in a prefix to perform a multihash.Sum() // and return a newly constructed Cid with the resulting multihash. func ( Prefix) ( []byte) (Cid, error) { := .MhLength if .MhType == mh.IDENTITY { = -1 } if .Version == 0 && (.MhType != mh.SHA2_256 || (.MhLength != 32 && .MhLength != -1)) { return Undef, ErrInvalidCid{fmt.Errorf("invalid v0 prefix")} } , := mh.Sum(, .MhType, ) if != nil { return Undef, ErrInvalidCid{} } switch .Version { case 0: return NewCidV0(), nil case 1: return NewCidV1(.Codec, ), nil default: return Undef, ErrInvalidCid{fmt.Errorf("invalid cid version")} } } // Bytes returns a byte representation of a Prefix. It looks like: // // <version><codec><mh-type><mh-length> func ( Prefix) () []byte { := varint.UvarintSize(.Version) += varint.UvarintSize(.Codec) += varint.UvarintSize(.MhType) += varint.UvarintSize(uint64(.MhLength)) := make([]byte, ) := varint.PutUvarint(, .Version) += varint.PutUvarint([:], .Codec) += varint.PutUvarint([:], .MhType) += varint.PutUvarint([:], uint64(.MhLength)) if != { panic("size mismatch") } return } // PrefixFromBytes parses a Prefix-byte representation onto a // Prefix. func ( []byte) (Prefix, error) { := bytes.NewReader() , := varint.ReadUvarint() if != nil { return Prefix{}, ErrInvalidCid{} } , := varint.ReadUvarint() if != nil { return Prefix{}, ErrInvalidCid{} } , := varint.ReadUvarint() if != nil { return Prefix{}, ErrInvalidCid{} } , := varint.ReadUvarint() if != nil { return Prefix{}, ErrInvalidCid{} } return Prefix{ Version: , Codec: , MhType: , MhLength: int(), }, nil } func ( []byte) (int, Cid, error) { if len() > 2 && [0] == mh.SHA2_256 && [1] == 32 { if len() < 34 { return 0, Undef, ErrInvalidCid{fmt.Errorf("not enough bytes for cid v0")} } , := mh.Cast([:34]) if != nil { return 0, Undef, ErrInvalidCid{} } return 34, Cid{string()}, nil } , , := varint.FromUvarint() if != nil { return 0, Undef, ErrInvalidCid{} } if != 1 { return 0, Undef, ErrInvalidCid{fmt.Errorf("expected 1 as the cid version number, got: %d", )} } , , := varint.FromUvarint([:]) if != nil { return 0, Undef, ErrInvalidCid{} } , , := mh.MHFromBytes([+:]) if != nil { return 0, Undef, ErrInvalidCid{} } := + + return , Cid{string([0:])}, nil } func toBufByteReader( io.Reader, []byte) *bufByteReader { // If the reader already implements ByteReader, use it directly. // Otherwise, use a fallback that does 1-byte Reads. if , := .(io.ByteReader); { return &bufByteReader{direct: , dst: } } return &bufByteReader{fallback: , dst: } } type bufByteReader struct { direct io.ByteReader fallback io.Reader dst []byte } func ( *bufByteReader) () (byte, error) { // The underlying reader has ReadByte; use it. if := .direct; != nil { , := .ReadByte() if != nil { return 0, } .dst = append(.dst, ) return , nil } // Fall back to a one-byte Read. // TODO: consider reading straight into dst, // once we have benchmarks and if they prove that to be faster. var [1]byte if , := io.ReadFull(.fallback, [:]); != nil { return 0, } .dst = append(.dst, [0]) return [0], nil } // CidFromReader reads a precise number of bytes for a CID from a given reader. // It returns the number of bytes read, the CID, and any error encountered. // The number of bytes read is accurate even if a non-nil error is returned. // // It's recommended to supply a reader that buffers and implements io.ByteReader, // as CidFromReader has to do many single-byte reads to decode varints. // If the argument only implements io.Reader, single-byte Read calls are used instead. // // If the Reader is found to yield zero bytes, an io.EOF error is returned directly, in all // other error cases, an ErrInvalidCid, wrapping the original error, is returned. func ( io.Reader) (int, Cid, error) { // 64 bytes is enough for any CIDv0, // and it's enough for most CIDv1s in practice. // If the digest is too long, we'll allocate more. := toBufByteReader(, make([]byte, 0, 64)) // We read the first varint, to tell if this is a CIDv0 or a CIDv1. // The varint package wants a io.ByteReader, so we must wrap our io.Reader. , := varint.ReadUvarint() if != nil { if == io.EOF { // First-byte read in ReadUvarint errors with io.EOF, so reader has no data. // Subsequent reads with an EOF will return io.ErrUnexpectedEOF and be wrapped here. return 0, Undef, } return len(.dst), Undef, ErrInvalidCid{} } // If we have a CIDv0, read the rest of the bytes and cast the buffer. if == mh.SHA2_256 { if , := io.ReadFull(, .dst[1:34]); != nil { return len(.dst) + , Undef, ErrInvalidCid{} } .dst = .dst[:34] , := mh.Cast(.dst) if != nil { return len(.dst), Undef, ErrInvalidCid{} } return len(.dst), Cid{string()}, nil } if != 1 { return len(.dst), Undef, ErrInvalidCid{fmt.Errorf("expected 1 as the cid version number, got: %d", )} } // CID block encoding multicodec. _, = varint.ReadUvarint() if != nil { return len(.dst), Undef, ErrInvalidCid{} } // We could replace most of the code below with go-multihash's ReadMultihash. // Note that it would save code, but prevent reusing buffers. // Plus, we already have a ByteReader now. := len(.dst) // Multihash hash function code. _, = varint.ReadUvarint() if != nil { return len(.dst), Undef, ErrInvalidCid{} } // Multihash digest length. , := varint.ReadUvarint() if != nil { return len(.dst), Undef, ErrInvalidCid{} } // Refuse to make large allocations to prevent OOMs due to bugs. const = 32 << 20 // 32MiB if > { return len(.dst), Undef, ErrInvalidCid{fmt.Errorf("refusing to allocate %d bytes for a digest", )} } // Fine to convert mhl to int, given maxDigestAlloc. := len(.dst) := + int() if > cap(.dst) { // If the multihash digest doesn't fit in our initial 64 bytes, // efficiently extend the slice via append+make. .dst = append(.dst, make([]byte, -len(.dst))...) } else { // The multihash digest fits inside our buffer, // so just extend its capacity. .dst = .dst[:] } if , := io.ReadFull(, .dst[:]); != nil { // We can't use len(br.dst) here, // as we've only read n bytes past prefixLength. return + , Undef, ErrInvalidCid{} } // This simply ensures the multihash is valid. // TODO: consider removing this bit, as it's probably redundant; // for now, it helps ensure consistency with CidFromBytes. _, _, = mh.MHFromBytes(.dst[:]) if != nil { return len(.dst), Undef, ErrInvalidCid{} } return len(.dst), Cid{string(.dst)}, nil }