Documentation
¶
Overview ¶
Package nested allows for additional calculations to be added to the output of a chutils.Input reader. The motivation is that there may be additional fields that need to be calculated from the source data before it is handed off to a chutils.Output writer.
The Reader defined here implements chutils.Input. Examples of usage:
- Adding a field based on which input fields passed validation.
- Adding additional fields calculated from the existing inputs.
- Adding additional fields based on other variables using a function closure.
- Modifying existing fields
Index ¶
- type NewCalcFn
- type Reader
- func (rdr *Reader) Close() error
- func (rdr *Reader) CountLines() (numLines int, err error)
- func (rdr *Reader) Read(nTarget int, validate bool) (data []chutils.Row, valid []chutils.Valid, err error)
- func (rdr *Reader) Reset() error
- func (rdr *Reader) Seek(lineNo int) error
- func (rdr *Reader) TableSpec() *chutils.TableDef
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type NewCalcFn ¶
type NewCalcFn func(ts *chutils.TableDef, data chutils.Row, valid chutils.Valid, validatge bool) (interface{}, error)
NewCalcFn defines the signature of a function that calculates a new field. Note that NewCalcFn can also modify values within data
type Reader ¶
type Reader struct {
// contains filtered or unexported fields
}
Reader struc that implements chutils.Input.
Note that r cannot be embedded because we need to have both r.Read and Reader.Read
func NewReader ¶
func NewReader(rdr chutils.Input, newFields []*chutils.FieldDef, newCalcs []NewCalcFn) (*Reader, error)
NewReader creates a new Reader from
- rdr a base reader that satisfies chutils.Input.
- newFields an array that defines the additional fields
- newCalcs an array of functions that populate the additional fields
func (*Reader) CountLines ¶
CountLines returns the number of lines in the underlying reader, Reader.r
func (*Reader) Read ¶
func (rdr *Reader) Read(nTarget int, validate bool) (data []chutils.Row, valid []chutils.Valid, err error)
Read reads nTarget rows from the underlying reader -- Reader.r -- and adds calculated fields. Validation is performed if validate == true. Note: if validate == false, the return from r.Read are strings
Example ¶
Example 1
/*
/data/input.csv
x,y
1.0,2.0
3.0,4.0
100.0, 100.0
*/
myFile := os.Getenv("data") + "/input.csv"
inFile, err := os.Open(myFile)
if err != nil {
panic(err)
}
baseReader := file.NewReader("", ',', '\n', '"', 0, 1, 0, inFile, 0)
defer func() {
if baseReader.Close() != nil {
panic(err)
}
}()
// initialize TableSpec
if e := baseReader.Init("x", chutils.MergeTree); e != nil {
panic(e)
}
if e := baseReader.TableSpec().Impute(baseReader, 0, .95); e != nil {
panic(e)
}
if err = baseReader.TableSpec().Check(); err != nil {
panic(err)
}
fd := &chutils.FieldDef{
Name: "product",
ChSpec: chutils.ChField{Base: chutils.ChFloat, Length: 64},
Description: "The product of the x and y",
Legal: chutils.NewLegalValues(),
Missing: -1.0,
Width: 0,
}
fd.Legal.LowLimit, fd.Legal.HighLimit = 0.0, 100.0
// Create map with new field
newFields := []*chutils.FieldDef{fd}
// Create slice of function to calculate this
newCalcs := make([]NewCalcFn, 0)
newCalcs = append(newCalcs,
func(td *chutils.TableDef, data chutils.Row, valid chutils.Valid, validate bool) (interface{}, error) {
// this will fail is validate == false since they will be strings
x, okx := data[0].(float64)
y, oky := data[1].(float64)
if !okx || !oky {
return 0.0, chutils.Wrapper(chutils.ErrInput, "bad inputs to calculation of product")
}
return x * y, nil
})
// This reader will include our new field "product"
reader, err := NewReader(baseReader, newFields, newCalcs)
if err != nil {
panic(err)
}
data, _, err := reader.Read(0, true)
if err != nil && err != io.EOF {
panic(err)
}
fmt.Println(data)
Output: [[1 2 2] [3 4 12] [100 100 -1]]
Example (Additional) ¶
Example : Column Locations Unknown
// If we are unsure of where x and y might be in the CSV, we can find out from the TableSpec
myFile := os.Getenv("data") + "/input.csv"
inFile, err := os.Open(myFile)
if err != nil {
panic(err)
}
baseReader := file.NewReader("", ',', '\n', '"', 0, 1, 0, inFile, 0)
defer func() {
if baseReader.Close() != nil {
panic(err)
}
}()
// initialize TableSpec
if e := baseReader.Init("x", chutils.MergeTree); e != nil {
panic(e)
}
if e := baseReader.TableSpec().Impute(baseReader, 0, .95); e != nil {
panic(e)
}
fd := &chutils.FieldDef{
Name: "product",
ChSpec: chutils.ChField{Base: chutils.ChFloat, Length: 64},
Description: "The product of the first two fields",
Legal: chutils.NewLegalValues(),
Missing: -1.0,
Width: 0,
}
fd.Legal.LowLimit, fd.Legal.HighLimit = 0.0, 100.0
// Create map with new field
newFields := []*chutils.FieldDef{fd}
// Create slice of function to calculate this
newCalcs := make([]NewCalcFn, 0)
newCalcs = append(newCalcs,
func(td *chutils.TableDef, data chutils.Row, valid chutils.Valid, validate bool) (interface{}, error) {
// if we don't know where x and y are in the file, we can get their indices
indx, _, err := td.Get("x")
if err != nil {
panic(err)
}
indy, _, err := td.Get("y")
if err != nil {
panic(err)
}
x, okx := data[indx].(float64)
y, oky := data[indy].(float64)
if !okx || !oky {
return 0.0, chutils.Wrapper(chutils.ErrInput, "bad inputs to calculation of product")
}
return x * y, nil
})
// This reader will include our new field "product"
reader, err := NewReader(baseReader, newFields, newCalcs)
if err != nil {
panic(err)
}
data, _, err := reader.Read(0, true)
if err != nil && err != io.EOF {
panic(err)
}
fmt.Println(data)
Output: [[1 2 2] [3 4 12] [100 100 -1]]