diff --git a/dataframe_subset.go b/dataframe_subset.go index b77f13cec6e8a1b8c0f36af3457b48c6cf784129..165c57ee3ee9cef1f51a30480033b76d5d22e0ab 100644 --- a/dataframe_subset.go +++ b/dataframe_subset.go @@ -1,6 +1,8 @@ package pandas -import "gitee.com/quant1x/pandas/stat" +import ( + "gitee.com/quant1x/pandas/stat" +) // Subset returns a subset of the rows of the original DataFrame based on the // Series subsetting indexes. @@ -41,3 +43,47 @@ func (self DataFrame) SelectRows(p stat.ScopeLimit) DataFrame { } return newDF } + +func (self DataFrame) Concat(dfb DataFrame) DataFrame { + if self.Err != nil { + return self + } + if dfb.Err != nil { + return dfb + } + + uniques := make(map[string]struct{}) + cols := []string{} + for _, t := range []DataFrame{self, dfb} { + for _, u := range t.Names() { + if _, ok := uniques[u]; !ok { + uniques[u] = struct{}{} + cols = append(cols, u) + } + } + } + + expandedSeries := make([]stat.Series, len(cols)) + for k, v := range cols { + aidx := findInStringSlice(v, self.Names()) + bidx := findInStringSlice(v, dfb.Names()) + + // aidx and bidx must not be -1 at the same time. + var a, b stat.Series + if aidx != -1 { + a = self.columns[aidx] + } else { + bb := dfb.columns[bidx] + a = NewSeries(bb.Type(), bb.Name(), make([]struct{}, self.nrows)) + + } + if bidx != -1 { + b = dfb.columns[bidx] + } else { + b = NewSeries(a.Type(), a.Name(), make([]struct{}, dfb.nrows)) + } + newSeries := a.Concat(b) + expandedSeries[k] = newSeries + } + return NewDataFrame(expandedSeries...) +} diff --git a/dataframe_test.go b/dataframe_test.go index 08023483aeb8caadef53b062997ce663773720be..51a051c51d6337a99c6c998d3b1750fb9a3c6060 100644 --- a/dataframe_test.go +++ b/dataframe_test.go @@ -34,3 +34,17 @@ func TestLoadStructs(t *testing.T) { df2 := LoadStructs(dataTags) fmt.Println(df2) } + +func TestDataFrame_Concat(t *testing.T) { + d1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} + s1 := NewNDFrame[float64]("x", d1...) + df1 := NewDataFrame(s1) + d2 := []float64{101, 102} + s2 := NewNDFrame[float64]("x", d2...) + df2 := NewDataFrame(s2) + fmt.Println(df1) + fmt.Println(df2) + df3 := df1.Concat(df2) + fmt.Println(df1) + fmt.Println(df3) +} diff --git a/series_append.go b/series_append.go index f8351d8bf847c20b5c2f004824b575f47d51f658..9b1dc21640a54189bb906360d182de76829ad919 100644 --- a/series_append.go +++ b/series_append.go @@ -75,3 +75,9 @@ func (self *NDFrame) Append(values ...any) stat.Series { } return self } + +func (self *NDFrame) Concat(x stat.Series) stat.Series { + y := self.Copy() + y = y.Append(x.Values()) + return y +} diff --git a/series_apply.go b/series_apply.go index 6faf9f37e93197b29ce9286910baa3f890e548fa..d3ae17d80a1c3b97cc432099a571a59652fecea3 100644 --- a/series_apply.go +++ b/series_apply.go @@ -1,6 +1,9 @@ package pandas -import "reflect" +import ( + "gitee.com/quant1x/pandas/stat" + "reflect" +) func (self *NDFrame) Apply(f func(idx int, v any)) { vv := reflect.ValueOf(self.values) @@ -45,3 +48,27 @@ func (self *NDFrame) Logic(f func(idx int, v any) bool) []bool { } return x } + +func (self *NDFrame) Apply2(f func(idx int, v any) any, args ...bool) stat.Series { + inplace := false + if len(args) >= 1 { + inplace = args[0] + } + vv := reflect.ValueOf(self.values) + vk := vv.Kind() + switch vk { + case reflect.Invalid: // {interface} nil + //series.assign(idx, size, Nil2Float64) + case reflect.Slice, reflect.Array: + for i := 0; i < vv.Len(); i++ { + tv := vv.Index(i).Interface() + r := f(i, tv) + if inplace { + vv.Index(i).Set(reflect.ValueOf(r)) + } + } + default: + // 其它类型忽略 + } + return self +} diff --git a/series_test.go b/series_test.go index a58049659c814699fd298f4851062a62a60ff4eb..8af9d1f3b9eb93433ac6fa9ee4f2c45b0c59f31e 100644 --- a/series_test.go +++ b/series_test.go @@ -75,3 +75,20 @@ func TestRolling2(t *testing.T) { r2 := df.Col("x").Rolling(s2).Mean().Values() fmt.Println("序列化结果:", r2) } + +func TestSeriesConcat(t *testing.T) { + d1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} + s1 := NewNDFrame[float64]("x", d1...) + d2 := []float64{101, 102} + s2 := NewNDFrame[float64]("x", d2...) + s21 := s2.Apply2(func(idx int, v any) any { + r := v.(float64) + return r * r + }, true) + fmt.Println(s21) + fmt.Println(s1) + fmt.Println(s2) + s3 := s1.Concat(s2) + fmt.Println(s1) + fmt.Println(s3) +} diff --git a/stat/ndarray.go b/stat/ndarray.go index 0679dd8168c1351bb9fa8c8c9d2b9c053692c223..fc59da72165ec8bfd2fc60b2f0767455c775dc45 100644 --- a/stat/ndarray.go +++ b/stat/ndarray.go @@ -162,7 +162,7 @@ func (self NDArray[T]) Apply(f func(idx int, v any)) { } // Apply2 提供可替换功能的apply方法, 默认不替换 -func (self NDArray[T]) Apply2(f func(idx int, v T) T, args ...bool) { +func (self NDArray[T]) Apply2(f func(idx int, v any) any, args ...bool) Series { inplace := false if len(args) >= 1 { inplace = args[0] @@ -170,9 +170,10 @@ func (self NDArray[T]) Apply2(f func(idx int, v T) T, args ...bool) { for i, v := range self { r := f(i, v) if inplace { - self[i] = r + self[i] = anyToGeneric[T](r) } } + return self } func (self NDArray[T]) Logic(f func(idx int, v any) bool) []bool { diff --git a/stat/ndarray_append.go b/stat/ndarray_append.go index 9f15818d2bb6d7301a85554141679c91eb410472..690036b9bfa82f102318ab21e7514145504b9d3c 100644 --- a/stat/ndarray_append.go +++ b/stat/ndarray_append.go @@ -93,3 +93,9 @@ func (self NDArray[T]) Append(values ...any) Series { } return self } + +func (self NDArray[T]) Concat(x Series) Series { + y := self.Copy() + y = y.Append(x.Values()) + return y +} diff --git a/stat/ndarray_append_test.go b/stat/ndarray_append_test.go new file mode 100644 index 0000000000000000000000000000000000000000..459a0f9c0fe4bd8756d753591d0e266d7b423b9c --- /dev/null +++ b/stat/ndarray_append_test.go @@ -0,0 +1,19 @@ +package stat + +import ( + "fmt" + "testing" +) + +func TestNDArray_Concat(t *testing.T) { + d1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} + s1 := NewSeries(d1...) + fmt.Println(s1) + d2 := []float64{101, 102} + s2 := NewSeries(d2...) + fmt.Println(s2) + s3 := s1.Concat(s2) + fmt.Println(s1) + fmt.Println(s3) + +} diff --git a/stat/ndarray_test.go b/stat/ndarray_test.go index 83c456e9c3d0dc7e6c9b4d3937de8204c5fdad15..a98621cc5b448936cca1d6dcf7581b213f5fbfbb 100644 --- a/stat/ndarray_test.go +++ b/stat/ndarray_test.go @@ -71,8 +71,9 @@ func TestNDArray_Apply(t *testing.T) { s1 := NewSeries(d1...) fmt.Println(s1) s2 := s1.(NDArray[float64]) - s2.Apply2(func(idx int, v float64) float64 { - return v * v + s2.Apply2(func(idx int, v any) any { + f := anyToGeneric[float64](v) + return f * f }, true) fmt.Println(s2) } diff --git a/stat/series.go b/stat/series.go index 390f8690a673cfb19ed895b3fea9df3a92b1e34d..0a73a6258583af97f0687ae223c6cc524e305cd3 100644 --- a/stat/series.go +++ b/stat/series.go @@ -48,6 +48,9 @@ type Series interface { Select(r ScopeLimit) Series // Append 增加一批记录 Append(values ...any) Series + // Concat concatenates two series together. It will return a new Series with the + // combined elements of both Series. + Concat(x Series) Series // Records returns the elements of a Series as a []string Records() []string @@ -67,6 +70,8 @@ type Series interface { Rolling(param any) RollingAndExpandingMixin // Apply 接受一个回调函数 Apply(f func(idx int, v any)) + // Apply2 增加替换功能, 默认不替换 + Apply2(f func(idx int, v any) any, args ...bool) Series // Logic 逻辑处理 Logic(f func(idx int, v any) bool) []bool // EWM Provide exponentially weighted (EW) calculations.