From 90f8da3de9869de2a8eb979be4ae3c0b8c6dda99 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Fri, 3 Feb 2023 14:26:44 +0800 Subject: [PATCH 1/8] =?UTF-8?q?=E6=B7=BB=E5=8A=A0range=E7=9A=84=E8=A7=86?= =?UTF-8?q?=E7=BA=BF=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- range.go | 166 ++++++++++++++++++++++++++++++++++++++++++++++++++ range_test.go | 86 ++++++++++++++++++++++++++ 2 files changed, 252 insertions(+) create mode 100644 range.go create mode 100644 range_test.go diff --git a/range.go b/range.go new file mode 100644 index 0000000..aac7de6 --- /dev/null +++ b/range.go @@ -0,0 +1,166 @@ +// Copyright 2018-20 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved. + +package pandas + +import ( + "errors" + "fmt" +) + +// Range is used to specify a range. Both Start and End are inclusive. +// A nil value means no limit, so a Start of nil means 0 +// and an End of nil means no limit. +// The End value must always be equal to or larger than Start. +// Negative values are acceptable. A value of -2 means the second last row. +type Range struct { + Start *int + End *int +} + +// String implements Stringer interface. +func (r Range) String() string { + if r.Start == nil { + if r.End == nil { + return "Range:nil—nil" + } + return fmt.Sprintf("Range:nil—%d", *r.End) + } + if r.End == nil { + return fmt.Sprintf("Range:%d—nil", *r.Start) + } + return fmt.Sprintf("Range:%d—%d", *r.Start, *r.End) +} + +// NRows returns the number of rows contained by Range. +// If End is nil, then length must be provided. +func (r *Range) NRows(length ...int) (int, error) { + + if len(length) > 0 { + s, e, err := r.Limits(length[0]) + if err != nil { + return 0, err + } + + return e - s + 1, nil + } + + if r.End == nil { + return 0, errors.New("End is nil so length must be provided") + } + + var s int + + if r.Start != nil { + s = *r.Start + } + + if s < 0 || *r.End < 0 { + return 0, errors.New("range invalid") + } + + if *r.End < s { + return 0, errors.New("range invalid") + } + + return *r.End - s + 1, nil +} + +// Limits is used to return the start and end limits of a Range +// object for a given Dataframe or Series with length number of rows. +func (r *Range) Limits(length int) (s int, e int, _ error) { + + if length <= 0 { + return 0, 0, errors.New("limit undefined") + } + + if r.Start == nil { + s = 0 + } else { + if *r.Start < 0 { + // negative + s = length + *r.Start + } else { + s = *r.Start + } + } + + if r.End == nil { + e = length - 1 + } else { + if *r.End < 0 { + // negative + e = length + *r.End + } else { + e = *r.End + } + } + + if s < 0 || e < 0 { + return 0, 0, errors.New("range invalid") + } + + if s > e { + return 0, 0, errors.New("range invalid") + } + + if s >= length || e >= length { + return 0, 0, errors.New("range invalid") + } + + return +} + +// RangeFinite returns a Range that has a finite span. +func RangeFinite(start int, end ...int) Range { + r := Range{ + Start: &start, + } + if len(end) > 0 { + r.End = &end[0] + } + return r +} + +// IntsToRanges will convert an already (ascending) ordered list of ints to a slice of Ranges. +// +// Example: +// +// import "sort" +// ints := []int{2,4,5,6,8,10,11,45,46} +// sort.Ints(ints) +// +// fmt.Println(IntsToRanges(ints)) +// // Output: R{2,2}, R{4,6}, R{8,8}, R{10,11}, R{45,46} +func IntsToRanges(ints []int) []Range { + + out := []Range{} + +OUTER: + for i := 0; i < len(ints); i++ { + v1 := ints[i] + + j := i + 1 + for { + if j >= len(ints) { + // j doesn't exist + v2 := ints[j-1] + out = append(out, Range{Start: &v1, End: &v2}) + break OUTER + } else { + // j does exist + v2 := ints[j] + prevVal := ints[j-1] + + if (v2 != prevVal) && (v2 != prevVal+1) { + out = append(out, Range{Start: &v1, End: &prevVal}) + i = j - 1 + break + } + j++ + continue + } + } + } + + return out +} diff --git a/range_test.go b/range_test.go new file mode 100644 index 0000000..ba967d5 --- /dev/null +++ b/range_test.go @@ -0,0 +1,86 @@ +// Copyright 2018-20 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved. + +package pandas + +import ( + "testing" +) + +type tcase struct { + Start *int + End *int + ExpN int + ExpS int + ExpE int +} + +func TestRange(t *testing.T) { + + vals := []int{0, 1, 2, 3} + + N := len(vals) + + i := func(i int) *int { + return &i + } + + tests := []tcase{ + { + Start: nil, + End: nil, + ExpN: 4, + ExpS: 0, + ExpE: 3, + }, + { + Start: i(1), + End: i(3), + ExpN: 3, + ExpS: 1, + ExpE: 3, + }, + { + Start: nil, + End: i(-1), + ExpN: 4, + ExpS: 0, + ExpE: 3, + }, + { + Start: nil, + End: i(-2), + ExpN: 3, + ExpS: 0, + ExpE: 2, + }, + { + Start: i(-3), + End: i(-2), + ExpN: 2, + ExpS: 1, + ExpE: 2, + }, + } + + for i, tc := range tests { + + rng := &Range{Start: tc.Start, End: tc.End} + + nrows, err := rng.NRows(N) + if err != nil { + panic(err) + } + if nrows != tc.ExpN { + t.Errorf("%d: |got: %v |expected: %v", i, nrows, tc.ExpN) + } + + s, e, err := rng.Limits(N) + if err != nil { + panic(err) + } + if s != tc.ExpS || e != tc.ExpE { + t.Errorf("%d: |got: %v,%v |expected: %v,%v", i, s, e, tc.ExpS, tc.ExpE) + } + } + +} -- Gitee From 94aaba520b6ec24098d6a41652c1944eaf9f8ad3 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Fri, 3 Feb 2023 15:20:55 +0800 Subject: [PATCH 2/8] =?UTF-8?q?abs=E5=A2=9E=E5=8A=A0=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stat/abs.go | 1 + 1 file changed, 1 insertion(+) diff --git a/stat/abs.go b/stat/abs.go index c3af367..3ea44ae 100644 --- a/stat/abs.go +++ b/stat/abs.go @@ -1,5 +1,6 @@ package stat +// Abs 泛型绝对值 func Abs[T StatType](x []T) []T { xlen := len(x) d := make([]T, xlen) -- Gitee From 1ca71a9447a9cbd38c08a3288c0f6eaf8cc90ea8 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Fri, 3 Feb 2023 16:03:22 +0800 Subject: [PATCH 3/8] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=85=A8=E5=B1=80?= =?UTF-8?q?=E7=9A=84=E7=A7=81=E6=9C=89=E5=8F=98=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series_generic.go | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/series_generic.go b/series_generic.go index 4ff8741..d07cec3 100644 --- a/series_generic.go +++ b/series_generic.go @@ -4,12 +4,19 @@ import ( "reflect" ) +// 初始化全局的私有变量 var ( - typeBool = reflect.TypeOf([]bool{}) - typeInt64 = reflect.TypeOf([]int64{}) - typeFloat32 = reflect.TypeOf([]float32{}) - typeFloat64 = reflect.TypeOf([]float64{}) - typeString = reflect.TypeOf([]string{}) + rawBool bool = true + typeBool = reflect.TypeOf([]bool{}) + rawInt32 int32 = int32(0) + typeInt32 = reflect.TypeOf([]int32{}) + rawInt64 int64 = int64(0) + typeInt64 = reflect.TypeOf([]int64{}) + rawFloat32 float32 = float32(0) + typeFloat32 = reflect.TypeOf([]float32{}) + rawFloat64 float64 = float64(0) + typeFloat64 = reflect.TypeOf([]float64{}) + typeString = reflect.TypeOf([]string{}) ) func CreateSeries(t Type, name string, v ...any) Series { -- Gitee From 2a84caf74ec36ad1883d6757e6cdaebce741ddf1 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Fri, 3 Feb 2023 16:03:52 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E5=A2=9E=E5=8A=A0select=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/series.go b/series.go index 94bd51a..1a52493 100644 --- a/series.go +++ b/series.go @@ -69,6 +69,8 @@ type Series interface { Max() any // Min 找出最小值 Min() any + // Select 选取一段记录 + Select(r Range) Series } // NewSeries 指定类型创建序列 -- Gitee From 6d73c43c2cafdd28f052eb990fcfcf70c9fff5bb Mon Sep 17 00:00:00 2001 From: wangfeng Date: Fri, 3 Feb 2023 16:32:41 +0800 Subject: [PATCH 5/8] =?UTF-8?q?=E7=8B=AC=E7=AB=8Bapply=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E5=87=BA=E6=9D=A5=E4=B8=80=E4=B8=AA=E6=BA=90=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic_apply.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 generic_apply.go diff --git a/generic_apply.go b/generic_apply.go new file mode 100644 index 0000000..cba37d3 --- /dev/null +++ b/generic_apply.go @@ -0,0 +1,24 @@ +package pandas + +import "reflect" + +func (self *NDFrame) apply(f func(idx int, v any)) { + vv := reflect.ValueOf(self.values) + vk := vv.Kind() + switch vk { + case reflect.Invalid: // {interface} nil + //series.assign(idx, size, Nil2Float64) + case reflect.Slice: // 切片, 不定长 + for i := 0; i < vv.Len(); i++ { + tv := vv.Index(i).Interface() + f(i, tv) + } + case reflect.Array: // 数组, 定长 + for i := 0; i < vv.Len(); i++ { + tv := vv.Index(i).Interface() + f(i, tv) + } + default: + // 其它类型忽略 + } +} -- Gitee From f2928e905d40f9f7459a243694309f9aca0af036 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Fri, 3 Feb 2023 16:33:13 +0800 Subject: [PATCH 6/8] =?UTF-8?q?=E7=8B=AC=E7=AB=8Bmax=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E5=87=BA=E6=9D=A5=E4=B8=80=E4=B8=AA=E6=BA=90=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic_max.go | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 generic_max.go diff --git a/generic_max.go b/generic_max.go new file mode 100644 index 0000000..b5fcdd7 --- /dev/null +++ b/generic_max.go @@ -0,0 +1,56 @@ +package pandas + +func (self *NDFrame) Max() any { + values := self.Values() + switch rows := values.(type) { + case []string: + max := "" + i := 0 + for idx, iv := range rows { + if StringIsNaN(iv) { + continue + } + if iv > max { + max = iv + i += 1 + } + _ = idx + } + if i > 0 { + return max + } + return StringNaN + case []int64: + max := int64(0) + //i := 0 + for idx, iv := range rows { + if Float64IsNaN(float64(iv)) { + continue + } + if iv > max { + max = iv + //i = idx + } + _ = idx + } + return max + case []float64: + max := float64(0) + i := 0 + for idx, iv := range rows { + if Float64IsNaN(iv) { + continue + } + if iv > max { + max = iv + i += 1 + } + _ = idx + } + if i > 0 { + return max + } + return Nil2Float64 + } + return Nil2Float64 +} -- Gitee From c7debbaeab23966effe3ff9491fc2d4a948ca089 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Fri, 3 Feb 2023 16:33:29 +0800 Subject: [PATCH 7/8] =?UTF-8?q?=E7=8B=AC=E7=AB=8Bmin=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E5=87=BA=E6=9D=A5=E4=B8=80=E4=B8=AA=E6=BA=90=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic_min.go | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 generic_min.go diff --git a/generic_min.go b/generic_min.go new file mode 100644 index 0000000..a3da84f --- /dev/null +++ b/generic_min.go @@ -0,0 +1,65 @@ +package pandas + +func (self *NDFrame) Min() any { + values := self.Values() + switch rows := values.(type) { + case []string: + min := "" + i := 0 + for idx, iv := range rows { + if StringIsNaN(iv) { + continue + } else if i < 1 { + min = iv + i += 1 + } + if iv < min { + min = iv + i += 1 + } + _ = idx + } + if i > 0 { + return min + } + return StringNaN + case []int64: + min := int64(0) + i := 0 + for idx, iv := range rows { + if Float64IsNaN(float64(iv)) { + continue + } else if i < 1 { + min = iv + i += 1 + } + if iv < min { + min = iv + i += 1 + } + _ = idx + } + return min + case []float64: + min := float64(0) + i := 0 + for idx, iv := range rows { + if Float64IsNaN(iv) { + continue + } else if i < 1 { + min = iv + i += 1 + } + if iv < min { + min = iv + i += 1 + } + _ = idx + } + if i > 0 { + return min + } + return Nil2Float64 + } + return Nil2Float64 +} -- Gitee From 46e5adf92b70a320a0c44438904ddc0aac8dc3c3 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Fri, 3 Feb 2023 16:36:50 +0800 Subject: [PATCH 8/8] =?UTF-8?q?#I6CCAE=20=E6=94=AF=E6=8C=81=E8=8C=83?= =?UTF-8?q?=E5=9B=B4=E9=80=89=E6=8B=A9,=20=E4=BB=8E=E5=89=8D=E5=92=8C?= =?UTF-8?q?=E4=BB=8E=E5=90=8E=E4=B8=A4=E4=B8=AA=E6=96=B9=E5=90=91,=20?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E9=BB=98=E8=AE=A4=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe_subset.go | 10 +++ generic.go | 211 -------------------------------------------- generic_range.go | 123 ++++++++++++++++++++++++++ generic_test.go | 5 ++ 4 files changed, 138 insertions(+), 211 deletions(-) create mode 100644 generic_range.go diff --git a/dataframe_subset.go b/dataframe_subset.go index ea0b0d9..db634bd 100644 --- a/dataframe_subset.go +++ b/dataframe_subset.go @@ -21,3 +21,13 @@ func (df DataFrame) Subset(start, end int) DataFrame { nrows: nrows, } } + +// 选择一段记录 +func (df DataFrame) Select(p Range) DataFrame { + serieses := []Series{} + for i := range df.columns { + serieses = append(serieses, df.columns[i].Select(p)) + } + newDF := DataFrame{columns: serieses} + return newDF +} diff --git a/generic.go b/generic.go index f7e732c..2411e38 100644 --- a/generic.go +++ b/generic.go @@ -178,27 +178,6 @@ func (self *NDFrame) Empty() Series { return &frame } -func (self *NDFrame) apply(f func(idx int, v any)) { - vv := reflect.ValueOf(self.values) - vk := vv.Kind() - switch vk { - case reflect.Invalid: // {interface} nil - //series.assign(idx, size, Nil2Float64) - case reflect.Slice: // 切片, 不定长 - for i := 0; i < vv.Len(); i++ { - tv := vv.Index(i).Interface() - f(i, tv) - } - case reflect.Array: // 数组, 定长 - for i := 0; i < vv.Len(); i++ { - tv := vv.Index(i).Interface() - f(i, tv) - } - default: - // 其它类型忽略 - } -} - func (self *NDFrame) Records() []string { ret := make([]string, self.Len()) self.apply(func(idx int, v any) { @@ -207,77 +186,6 @@ func (self *NDFrame) Records() []string { return ret } -func (self *NDFrame) Copy() Series { - vlen := self.Len() - return self.Subset(0, vlen) -} - -func (self *NDFrame) Subset(start, end int, opt ...any) Series { - // 默认不copy - var __optCopy bool = false - if len(opt) > 0 { - // 第一个参数为是否copy - if _cp, ok := opt[0].(bool); ok { - __optCopy = _cp - } - } - var vs any - var rows int - switch values := self.values.(type) { - case []bool: - subset := values[start:end] - rows = len(subset) - if !__optCopy { - vs = subset - } else { - _vs := make([]bool, 0) - _vs = append(_vs, subset...) - vs = _vs - } - case []string: - subset := values[start:end] - rows = len(subset) - if !__optCopy { - vs = subset - } else { - _vs := make([]string, 0) - _vs = append(_vs, subset...) - vs = _vs - } - case []int64: - subset := values[start:end] - rows = len(subset) - if !__optCopy { - vs = subset - } else { - _vs := make([]int64, 0) - _vs = append(_vs, subset...) - vs = _vs - } - case []float64: - subset := values[start:end] - rows = len(subset) - if !__optCopy { - vs = subset - } else { - _vs := make([]float64, 0) - _vs = append(_vs, subset...) - vs = _vs - } - } - frame := NDFrame{ - formatter: self.formatter, - name: self.name, - type_: self.type_, - nilCount: 0, - rows: rows, - values: vs, - } - var s Series - s = &frame - return s -} - func (self *NDFrame) Repeat(x any, repeats int) Series { switch values := self.values.(type) { case []bool: @@ -378,122 +286,3 @@ func (self *NDFrame) FillNa(v any, inplace bool) { } } } - -func (self *NDFrame) Max() any { - values := self.Values() - switch rows := values.(type) { - case []string: - max := "" - i := 0 - for idx, iv := range rows { - if StringIsNaN(iv) { - continue - } - if iv > max { - max = iv - i += 1 - } - _ = idx - } - if i > 0 { - return max - } - return StringNaN - case []int64: - max := int64(0) - //i := 0 - for idx, iv := range rows { - if Float64IsNaN(float64(iv)) { - continue - } - if iv > max { - max = iv - //i = idx - } - _ = idx - } - return max - case []float64: - max := float64(0) - i := 0 - for idx, iv := range rows { - if Float64IsNaN(iv) { - continue - } - if iv > max { - max = iv - i += 1 - } - _ = idx - } - if i > 0 { - return max - } - return Nil2Float64 - } - return Nil2Float64 -} - -func (self *NDFrame) Min() any { - values := self.Values() - switch rows := values.(type) { - case []string: - min := "" - i := 0 - for idx, iv := range rows { - if StringIsNaN(iv) { - continue - } else if i < 1 { - min = iv - i += 1 - } - if iv < min { - min = iv - i += 1 - } - _ = idx - } - if i > 0 { - return min - } - return StringNaN - case []int64: - min := int64(0) - i := 0 - for idx, iv := range rows { - if Float64IsNaN(float64(iv)) { - continue - } else if i < 1 { - min = iv - i += 1 - } - if iv < min { - min = iv - i += 1 - } - _ = idx - } - return min - case []float64: - min := float64(0) - i := 0 - for idx, iv := range rows { - if Float64IsNaN(iv) { - continue - } else if i < 1 { - min = iv - i += 1 - } - if iv < min { - min = iv - i += 1 - } - _ = idx - } - if i > 0 { - return min - } - return Nil2Float64 - } - return Nil2Float64 -} diff --git a/generic_range.go b/generic_range.go new file mode 100644 index 0000000..e6eca6b --- /dev/null +++ b/generic_range.go @@ -0,0 +1,123 @@ +package pandas + +import ( + gc "github.com/huandu/go-clone" + "reflect" +) + +func (self *NDFrame) Copy() Series { + vlen := self.Len() + return self.Subset(0, vlen, true) +} + +func (self *NDFrame) Subset(start, end int, opt ...any) Series { + // 默认不copy + var __optCopy bool = false + if len(opt) > 0 { + // 第一个参数为是否copy + if _cp, ok := opt[0].(bool); ok { + __optCopy = _cp + } + } + var vs any + var rows int + vv := reflect.ValueOf(self.values) + vk := vv.Kind() + switch vk { + case reflect.Slice, reflect.Array: // 切片和数组同样的处理逻辑 + vs = vv.Slice(start, end).Interface() + rows = vv.Len() + if __optCopy && rows > 0 { + vs = gc.Clone(vs) + } + rows = vv.Len() + frame := NDFrame{ + formatter: self.formatter, + name: self.name, + type_: self.type_, + nilCount: 0, + rows: rows, + values: vs, + } + return &frame + default: + // 其它类型忽略 + } + return self.Empty() +} + +func (self *NDFrame) oldSubset(start, end int, opt ...any) Series { + // 默认不copy + var __optCopy bool = false + if len(opt) > 0 { + // 第一个参数为是否copy + if _cp, ok := opt[0].(bool); ok { + __optCopy = _cp + } + } + var vs any + var rows int + switch values := self.values.(type) { + case []bool: + subset := values[start:end] + rows = len(subset) + if !__optCopy { + vs = subset + } else { + _vs := make([]bool, 0) + _vs = append(_vs, subset...) + vs = _vs + } + case []string: + subset := values[start:end] + rows = len(subset) + if !__optCopy { + vs = subset + } else { + _vs := make([]string, 0) + _vs = append(_vs, subset...) + vs = _vs + } + case []int64: + subset := values[start:end] + rows = len(subset) + if !__optCopy { + vs = subset + } else { + _vs := make([]int64, 0) + _vs = append(_vs, subset...) + vs = _vs + } + case []float64: + subset := values[start:end] + rows = len(subset) + if !__optCopy { + vs = subset + } else { + _vs := make([]float64, 0) + _vs = append(_vs, subset...) + vs = _vs + } + } + frame := NDFrame{ + formatter: self.formatter, + name: self.name, + type_: self.type_, + nilCount: 0, + rows: rows, + values: vs, + } + var s Series + s = &frame + return s +} + +// 选取一段记录 +func (self *NDFrame) Select(r Range) Series { + start, end, err := r.Limits(self.Len()) + if err != nil { + return nil + } + series := self.Subset(start, end+1) + return series +} diff --git a/generic_test.go b/generic_test.go index 71fdbaf..c2bca3e 100644 --- a/generic_test.go +++ b/generic_test.go @@ -31,6 +31,11 @@ func TestNDFrameNew(t *testing.T) { d1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} nd1 := NewNDFrame[float64]("x", d1...) fmt.Println(nd1) + + r := RangeFinite(-1) + ndr1 := nd1.Select(r) + fmt.Println(ndr1.Values()) + fmt.Println(nd1.Records()) nd11 := nd1.Subset(1, 2, true) fmt.Println(nd11.Records()) -- Gitee