vramestimator

package

v1.28.14 Latest Latest Go to latest Published: Feb 10, 2025 License: MIT Imports: 20 Imported by: 1

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/sammcj/gollama

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func CalculateBPW(modelID string, memory float64, context int, kvCacheQuant KVCacheQuantisation, ...) (interface{}, error)
func CalculateContext(modelID string, memory, bpw float64, kvCacheQuant KVCacheQuantisation, ...) (int, error)
func CalculateVRAM(modelID string, bpw float64, context int, kvCacheQuant KVCacheQuantisation, ...) (float64, error)
func CalculateVRAMRaw(config ModelConfig, bpwValues BPWValues, context int, numGPUs int, gqa bool) float64
func DownloadFile(url, filePath string, headers map[string]string) error
func EstimateVRAM(modelIdentifier, apiURL string, fitsVRAM float64) error
func GetAvailableMemory() (float64, error)
func GetHuggingFaceToken() string
func GetSystemRAM() (float64, error)
func ParseBPW(bpw string) float64
func ParseBPWOrQuant(input string) (float64, error)
func ParseModelIdentifier(modelID string) (string, string, error)
func PrintFormattedTable(table QuantResultTable) string
type BPWValues
- func GetBPWValues(bpw float64, kvCacheQuant KVCacheQuantisation) BPWValues
type ContextVRAM
type KVCacheQuantisation
type ModelConfig
- func GetModelConfig(modelID string) (ModelConfig, error)
type OllamaModelInfo
- func FetchOllamaModelInfo(apiURL, modelName string) (*OllamaModelInfo, error)
type QuantResult
type QuantResultTable
- func GenerateQuantTable(modelID string, fitsVRAM float64, ollamaModelInfo *OllamaModelInfo, ...) (QuantResultTable, error)

Constants ¶

View Source

const (
	CUDASize = 500 * 1024 * 1024 // 500 MB
)

Variables ¶

View Source

var EXL2Options []float64

EXL2Options contains the EXL2 quantisation options

View Source

var GGUFMapping = map[string]float64{
	"F16":     16,
	"Q8_0":    8.5,
	"Q6_K":    6.59,
	"Q5_K_L":  5.75,
	"Q5_K_M":  5.69,
	"Q5_K_S":  5.54,
	"Q5_0":    5.54,
	"Q4_K_L":  4.9,
	"Q4_K_M":  4.85,
	"Q4_K_S":  4.58,
	"Q4_0":    4.55,
	"IQ4_NL":  4.5,
	"Q3_K_L":  4.27,
	"IQ4_XS":  4.25,
	"Q3_K_M":  3.91,
	"IQ3_M":   3.7,
	"IQ3_S":   3.5,
	"Q3_K_S":  3.5,
	"Q2_K":    3.35,
	"IQ3_XS":  3.3,
	"IQ3_XXS": 3.06,
	"IQ2_M":   2.7,
	"IQ2_S":   2.5,
	"IQ2_XS":  2.31,
	"IQ2_XXS": 2.06,
	"IQ1_S":   1.56,
	"Q2":      3.35,
	"Q3":      3.5,
	"Q4":      4.55,
	"Q5":      5.54,
	"Q6":      6.59,
	"Q8":      8.5,
	"FP16":    16,
}

GGUFMapping maps GGUF quantisation types to their corresponding bits per weight

Functions ¶

func CalculateBPW ¶

func CalculateBPW(modelID string, memory float64, context int, kvCacheQuant KVCacheQuantisation, quantType string, ollamaModelInfo *OllamaModelInfo) (interface{}, error)

CalculateBPW calculates the best BPW for a given memory and context constraint

func CalculateContext ¶

func CalculateContext(modelID string, memory, bpw float64, kvCacheQuant KVCacheQuantisation, ollamaModelInfo *OllamaModelInfo, topContext int) (int, error)

CalculateContext calculates the maximum context for a given memory constraint

func CalculateVRAM ¶

func CalculateVRAM(modelID string, bpw float64, context int, kvCacheQuant KVCacheQuantisation, ollamaModelInfo *OllamaModelInfo) (float64, error)

CalculateVRAM calculates the VRAM usage for a given model and configuration

func CalculateVRAMRaw ¶

func CalculateVRAMRaw(config ModelConfig, bpwValues BPWValues, context int, numGPUs int, gqa bool) float64

CalculateVRAMRaw calculates the raw VRAM usage

func DownloadFile ¶

func DownloadFile(url, filePath string, headers map[string]string) error

DownloadFile downloads a file from a URL and saves it to the specified path

func EstimateVRAM ¶ added in v1.26.0

func EstimateVRAM(modelIdentifier, apiURL string, fitsVRAM float64) error

func GetAvailableMemory ¶ added in v1.24.0

func GetAvailableMemory() (float64, error)

func GetHuggingFaceToken ¶ added in v1.27.9

func GetHuggingFaceToken() string

func GetSystemRAM ¶ added in v1.24.0

func GetSystemRAM() (float64, error)

func ParseBPW ¶

func ParseBPW(bpw string) float64

ParseBPW parses the BPW value

func ParseBPWOrQuant ¶

func ParseBPWOrQuant(input string) (float64, error)

parseBPWOrQuant takes a string and returns a float64 BPW value

func ParseModelIdentifier ¶ added in v1.27.19

func ParseModelIdentifier(modelID string) (string, string, error)

ParseModelIdentifier parses a model identifier into its base name and quantisation level. Handles both HuggingFace (contains "/") and Ollama (contains ":" or neither) formats.

func PrintFormattedTable ¶ added in v1.24.0

func PrintFormattedTable(table QuantResultTable) string

PrintFormattedTable updates the table formatting with better descriptions

Types ¶

type BPWValues ¶

type BPWValues struct {
	BPW        float64
	LMHeadBPW  float64
	KVCacheBPW float64
}

BPWValues represents the bits per weight values for different components

func GetBPWValues ¶

func GetBPWValues(bpw float64, kvCacheQuant KVCacheQuantisation) BPWValues

GetBPWValues calculates the BPW values based on the input

type ContextVRAM ¶ added in v1.24.0

type ContextVRAM struct {
	VRAM     float64
	VRAMQ8_0 float64
	VRAMQ4_0 float64
}

type KVCacheQuantisation ¶

type KVCacheQuantisation string

KVCacheQuantisation represents the quantisation type for the k/v context cache

const (
	KVCacheFP16 KVCacheQuantisation = "fp16"
	KVCacheQ8_0 KVCacheQuantisation = "q8_0"
	KVCacheQ4_0 KVCacheQuantisation = "q4_0"
)

type ModelConfig ¶

type ModelConfig struct {
	NumParams             float64 `json:"num_params"`
	MaxPositionEmbeddings int     `json:"max_position_embeddings"`
	NumHiddenLayers       int     `json:"num_hidden_layers"`
	HiddenSize            int     `json:"hidden_size"`
	NumKeyValueHeads      int     `json:"num_key_value_heads"`
	NumAttentionHeads     int     `json:"num_attention_heads"`
	IntermediateSize      int     `json:"intermediate_size"`
	VocabSize             int     `json:"vocab_size"`
}

ModelConfig represents the configuration of a model

func GetModelConfig ¶

func GetModelConfig(modelID string) (ModelConfig, error)

GetModelConfig retrieves and parses the model configuration

type OllamaModelInfo ¶ added in v1.26.0

type OllamaModelInfo struct {
	Details struct {
		ParameterSize     string   `json:"parameter_size"`
		QuantizationLevel string   `json:"quantisation_level"`
		Family            string   `json:"family"`
		Families          []string `json:"families"`
	} `json:"details"`
	ModelInfo map[string]interface{} `json:"model_info"`
}

func FetchOllamaModelInfo ¶ added in v1.26.0

func FetchOllamaModelInfo(apiURL, modelName string) (*OllamaModelInfo, error)

type QuantResult ¶ added in v1.24.0

type QuantResult struct {
	QuantType string
	BPW       float64
	Contexts  map[int]ContextVRAM
}

Update the QuantResult struct

type QuantResultTable ¶ added in v1.24.0

type QuantResultTable struct {
	ModelID  string
	Results  []QuantResult
	FitsVRAM float64
}

QuantResultTable represents a table of VRAM estimation results

func GenerateQuantTable ¶ added in v1.24.0

func GenerateQuantTable(modelID string, fitsVRAM float64, ollamaModelInfo *OllamaModelInfo, topContext int) (QuantResultTable, error)

Source Files ¶

View all Source files

vramestimator.go

Directories ¶

Path	Synopsis
cuda

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL