From 1cd1ca4b02ab2a2f21e191dcac8b0cd78f8adb5d Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 20 Feb 2023 10:24:27 +0800 Subject: [PATCH 1/2] =?UTF-8?q?series=20=E6=96=B0=E5=A2=9Econcat=E6=96=B9?= =?UTF-8?q?=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series_append.go | 6 ++++++ series_test.go | 12 ++++++++++++ stat/ndarray_append.go | 6 ++++++ stat/ndarray_append_test.go | 19 +++++++++++++++++++ stat/series.go | 3 +++ 5 files changed, 46 insertions(+) create mode 100644 stat/ndarray_append_test.go diff --git a/series_append.go b/series_append.go index f8351d8..9b1dc21 100644 --- a/series_append.go +++ b/series_append.go @@ -75,3 +75,9 @@ func (self *NDFrame) Append(values ...any) stat.Series { } return self } + +func (self *NDFrame) Concat(x stat.Series) stat.Series { + y := self.Copy() + y = y.Append(x.Values()) + return y +} diff --git a/series_test.go b/series_test.go index a580496..2582a5a 100644 --- a/series_test.go +++ b/series_test.go @@ -75,3 +75,15 @@ func TestRolling2(t *testing.T) { r2 := df.Col("x").Rolling(s2).Mean().Values() fmt.Println("序列化结果:", r2) } + +func TestSeriesConcat(t *testing.T) { + d1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} + s1 := NewNDFrame[float64]("x", d1...) + d2 := []float64{101, 102} + s2 := NewNDFrame[float64]("x", d2...) + fmt.Println(s1) + fmt.Println(s2) + s3 := s1.Concat(s2) + fmt.Println(s1) + fmt.Println(s3) +} diff --git a/stat/ndarray_append.go b/stat/ndarray_append.go index 9f15818..690036b 100644 --- a/stat/ndarray_append.go +++ b/stat/ndarray_append.go @@ -93,3 +93,9 @@ func (self NDArray[T]) Append(values ...any) Series { } return self } + +func (self NDArray[T]) Concat(x Series) Series { + y := self.Copy() + y = y.Append(x.Values()) + return y +} diff --git a/stat/ndarray_append_test.go b/stat/ndarray_append_test.go new file mode 100644 index 0000000..459a0f9 --- /dev/null +++ b/stat/ndarray_append_test.go @@ -0,0 +1,19 @@ +package stat + +import ( + "fmt" + "testing" +) + +func TestNDArray_Concat(t *testing.T) { + d1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} + s1 := NewSeries(d1...) + fmt.Println(s1) + d2 := []float64{101, 102} + s2 := NewSeries(d2...) + fmt.Println(s2) + s3 := s1.Concat(s2) + fmt.Println(s1) + fmt.Println(s3) + +} diff --git a/stat/series.go b/stat/series.go index 390f869..a66e9eb 100644 --- a/stat/series.go +++ b/stat/series.go @@ -48,6 +48,9 @@ type Series interface { Select(r ScopeLimit) Series // Append 增加一批记录 Append(values ...any) Series + // Concat concatenates two series together. It will return a new Series with the + // combined elements of both Series. + Concat(x Series) Series // Records returns the elements of a Series as a []string Records() []string -- Gitee From 5e2d84faf0df3473f8687e232593b7d98b6bbc48 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 20 Feb 2023 10:51:13 +0800 Subject: [PATCH 2/2] =?UTF-8?q?#I6G7FP=20dataframe=20&=20series=20?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0concat=E6=96=B9=E6=B3=95=E5=92=8Capply2,=20?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=9B=BF=E6=8D=A2=E5=85=83=E7=B4=A0=E7=9A=84?= =?UTF-8?q?=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe_subset.go | 48 +++++++++++++++++++++++++++++++++++++++++++- dataframe_test.go | 14 +++++++++++++ series_apply.go | 29 +++++++++++++++++++++++++- series_test.go | 5 +++++ stat/ndarray.go | 5 +++-- stat/ndarray_test.go | 5 +++-- stat/series.go | 2 ++ 7 files changed, 102 insertions(+), 6 deletions(-) diff --git a/dataframe_subset.go b/dataframe_subset.go index b77f13c..165c57e 100644 --- a/dataframe_subset.go +++ b/dataframe_subset.go @@ -1,6 +1,8 @@ package pandas -import "gitee.com/quant1x/pandas/stat" +import ( + "gitee.com/quant1x/pandas/stat" +) // Subset returns a subset of the rows of the original DataFrame based on the // Series subsetting indexes. @@ -41,3 +43,47 @@ func (self DataFrame) SelectRows(p stat.ScopeLimit) DataFrame { } return newDF } + +func (self DataFrame) Concat(dfb DataFrame) DataFrame { + if self.Err != nil { + return self + } + if dfb.Err != nil { + return dfb + } + + uniques := make(map[string]struct{}) + cols := []string{} + for _, t := range []DataFrame{self, dfb} { + for _, u := range t.Names() { + if _, ok := uniques[u]; !ok { + uniques[u] = struct{}{} + cols = append(cols, u) + } + } + } + + expandedSeries := make([]stat.Series, len(cols)) + for k, v := range cols { + aidx := findInStringSlice(v, self.Names()) + bidx := findInStringSlice(v, dfb.Names()) + + // aidx and bidx must not be -1 at the same time. + var a, b stat.Series + if aidx != -1 { + a = self.columns[aidx] + } else { + bb := dfb.columns[bidx] + a = NewSeries(bb.Type(), bb.Name(), make([]struct{}, self.nrows)) + + } + if bidx != -1 { + b = dfb.columns[bidx] + } else { + b = NewSeries(a.Type(), a.Name(), make([]struct{}, dfb.nrows)) + } + newSeries := a.Concat(b) + expandedSeries[k] = newSeries + } + return NewDataFrame(expandedSeries...) +} diff --git a/dataframe_test.go b/dataframe_test.go index 0802348..51a051c 100644 --- a/dataframe_test.go +++ b/dataframe_test.go @@ -34,3 +34,17 @@ func TestLoadStructs(t *testing.T) { df2 := LoadStructs(dataTags) fmt.Println(df2) } + +func TestDataFrame_Concat(t *testing.T) { + d1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} + s1 := NewNDFrame[float64]("x", d1...) + df1 := NewDataFrame(s1) + d2 := []float64{101, 102} + s2 := NewNDFrame[float64]("x", d2...) + df2 := NewDataFrame(s2) + fmt.Println(df1) + fmt.Println(df2) + df3 := df1.Concat(df2) + fmt.Println(df1) + fmt.Println(df3) +} diff --git a/series_apply.go b/series_apply.go index 6faf9f3..d3ae17d 100644 --- a/series_apply.go +++ b/series_apply.go @@ -1,6 +1,9 @@ package pandas -import "reflect" +import ( + "gitee.com/quant1x/pandas/stat" + "reflect" +) func (self *NDFrame) Apply(f func(idx int, v any)) { vv := reflect.ValueOf(self.values) @@ -45,3 +48,27 @@ func (self *NDFrame) Logic(f func(idx int, v any) bool) []bool { } return x } + +func (self *NDFrame) Apply2(f func(idx int, v any) any, args ...bool) stat.Series { + inplace := false + if len(args) >= 1 { + inplace = args[0] + } + vv := reflect.ValueOf(self.values) + vk := vv.Kind() + switch vk { + case reflect.Invalid: // {interface} nil + //series.assign(idx, size, Nil2Float64) + case reflect.Slice, reflect.Array: + for i := 0; i < vv.Len(); i++ { + tv := vv.Index(i).Interface() + r := f(i, tv) + if inplace { + vv.Index(i).Set(reflect.ValueOf(r)) + } + } + default: + // 其它类型忽略 + } + return self +} diff --git a/series_test.go b/series_test.go index 2582a5a..8af9d1f 100644 --- a/series_test.go +++ b/series_test.go @@ -81,6 +81,11 @@ func TestSeriesConcat(t *testing.T) { s1 := NewNDFrame[float64]("x", d1...) d2 := []float64{101, 102} s2 := NewNDFrame[float64]("x", d2...) + s21 := s2.Apply2(func(idx int, v any) any { + r := v.(float64) + return r * r + }, true) + fmt.Println(s21) fmt.Println(s1) fmt.Println(s2) s3 := s1.Concat(s2) diff --git a/stat/ndarray.go b/stat/ndarray.go index 0679dd8..fc59da7 100644 --- a/stat/ndarray.go +++ b/stat/ndarray.go @@ -162,7 +162,7 @@ func (self NDArray[T]) Apply(f func(idx int, v any)) { } // Apply2 提供可替换功能的apply方法, 默认不替换 -func (self NDArray[T]) Apply2(f func(idx int, v T) T, args ...bool) { +func (self NDArray[T]) Apply2(f func(idx int, v any) any, args ...bool) Series { inplace := false if len(args) >= 1 { inplace = args[0] @@ -170,9 +170,10 @@ func (self NDArray[T]) Apply2(f func(idx int, v T) T, args ...bool) { for i, v := range self { r := f(i, v) if inplace { - self[i] = r + self[i] = anyToGeneric[T](r) } } + return self } func (self NDArray[T]) Logic(f func(idx int, v any) bool) []bool { diff --git a/stat/ndarray_test.go b/stat/ndarray_test.go index 83c456e..a98621c 100644 --- a/stat/ndarray_test.go +++ b/stat/ndarray_test.go @@ -71,8 +71,9 @@ func TestNDArray_Apply(t *testing.T) { s1 := NewSeries(d1...) fmt.Println(s1) s2 := s1.(NDArray[float64]) - s2.Apply2(func(idx int, v float64) float64 { - return v * v + s2.Apply2(func(idx int, v any) any { + f := anyToGeneric[float64](v) + return f * f }, true) fmt.Println(s2) } diff --git a/stat/series.go b/stat/series.go index a66e9eb..0a73a62 100644 --- a/stat/series.go +++ b/stat/series.go @@ -70,6 +70,8 @@ type Series interface { Rolling(param any) RollingAndExpandingMixin // Apply 接受一个回调函数 Apply(f func(idx int, v any)) + // Apply2 增加替换功能, 默认不替换 + Apply2(f func(idx int, v any) any, args ...bool) Series // Logic 逻辑处理 Logic(f func(idx int, v any) bool) []bool // EWM Provide exponentially weighted (EW) calculations. -- Gitee