]]>
]]>

GreenPlum notes

Queries

  • Q_UNIQ_2009_6
ERROR:  Unexpected internal error: Segment process received signal SIGSEGV  (seg4 slice2 localhost:50005 pid=4697)
ERROR:  GPDB detected segment reconfiguration. (cdbgang.c:3538)
  • Q_DAYSTAT_2009_7 - 455.247 s
  • Q_TOP20_PROJECTS_2009_7 - 769.497 s
  • Q_HOURPROJECTSTAT_es_2009_7_21 - 586.576 s
  • Q_NOPROJECTS_1_2009_7_14 - 456.620 s
  • Q_NOACCESS_SECOND_DAY - 8351.333 s
  • Q_DAYWEEKSTAT_de_2009_6 - 591.271 s
  • Q_PAGE_MONTHSTATS - 1789.938 s
  • Q_PAGES_PROJECT - 57736.971 s
  • Q_TOP50_de_2006_6_18
ERROR:  Lost connection to one or more segments - fault detector checking for segment failures. (seg0 slice3 localhost:50001 pid=7390)
ERROR:  GPDB detected segment reconfiguration. (cdbgang.c:3538)
  • Q_10000HITS - 1032.972 s
  • Q_PROJECT_AVG - 756.568 s
  • Q_HOURS_DROP - 228.632 s
  • Q_POINT_FACT - 126.774 s
  • Summary table - 15194.131 s
  • Q_DATE_RANGE - 112.073s
  • Q_PAGE_DATE_RANGE (first time -error) second time 4528.273 s
ERROR:  Lost connection to one or more segments - fault detector checking for segment failures. (seg0 slice4 localhost:50001 pid=11622)
ERROR:  GPDB detected segment reconfiguration. (cdbgang.c:3538)
  • Q_POINT_PAGE - 944.327 s
  • Q_HIT_SPIKE - 48507.006 s

Create tables

CREATE TABLE pages (  id int NOT NULL PRIMARY KEY, page varchar(1024) NOT NULL );

CREATE TABLE pagestat (
   date_id smallint  NOT NULL,
   project_id smallint  NOT NULL,
   page_id int NOT NULL,
   page_count int  NOT NULL )
WITH (appendonly=true, orientation=column, compresslevel=1, compresstype=QuickLZ);

CREATE TABLE pagestat_daily (
   page_id int NOT NULL,
   date_id_from smallint  NOT NULL,
   date_id_to smallint  NOT NULL,
   project_id smallint  NOT NULL,
   page_count int  NOT NULL )
WITH (appendonly=true, orientation=column, compresslevel=1, compresstype=QuickLZ);



CREATE TABLE datesinfo (
  id smallint NOT NULL,
  datestring char(11) NOT NULL,
  caldate date NOT NULL,
  calday smallint NOT NULL,
  calmonth smallint NOT NULL,
  calyear smallint NOT NULL,
  dayofweek smallint  NOT NULL,
  dayhour smallint  NOT NULL,
  PRIMARY KEY (id)
)

CREATE TABLE projects (
  id smallint NOT NULL,
  project varchar(50) NOT NULL,
  PRIMARY KEY (id)
)



gpfdist -d /data/vol2/wikistat/in/ -p 8081 -l /home/gpadmin/log


CREATE EXTERNAL TABLE ext_pages ( id int, 
   page varchar(1024) ) 
   LOCATION ('gpfdist://localhost:8081/pages.txt')
 FORMAT 'TEXT' (DELIMITER '|');

CREATE EXTERNAL TABLE ext_projects ( 
  id smallint,
  project varchar(50)
 ) 
   LOCATION ('gpfdist://localhost:8081/projects.txt')
 FORMAT 'TEXT' (DELIMITER '|');



CREATE EXTERNAL TABLE ext_pagestat (   date_id smallint , project_id smallint, page_id int ,page_count int )
LOCATION ('gpfdist://localhost:8081/200906.out') 
FORMAT 'TEXT' (DELIMITER '|');

CREATE EXTERNAL TABLE ext_pagestat_7 (   date_id smallint , project_id smallint, page_id int ,page_count int )
LOCATION ('gpfdist://localhost:8081/200907.out') 
FORMAT 'TEXT' (DELIMITER '|');

CREATE EXTERNAL TABLE ext_datesinfo (    id smallint,
  datestring char(11),
  caldate date,
  calday smallint,
  calmonth smallint,
  calyear smallint,
  dayofweek smallint,
  dayhour smallint
 )
LOCATION ('gpfdist://localhost:8081/datesinfo.txt') 
FORMAT 'TEXT' (DELIMITER '|');

INSERT INTO datesinfo SELECT * FROM ext_datesinfo;


INSERT INTO pages SELECT * FROM ext_pages;
INSERT 0 724533199
Time: 6935.685 s

INSERT INTO pagestat SELECT * FROM ext_pagestat;
INSERT 0 3453013109
Time: 3019.406 s

INSERT INTO pagestat SELECT * FROM ext_pagestat_7;                                                                       
INSERT 0 3442375618
Time: 2712.735 s


psql -d wikistat


wikistat=# CREATE TABLE pages (  id int NOT NULL PRIMARY KEY, page varchar(1024) NOT NULL );
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "pages_pkey" for table "pages"
CREATE TABLE
wikistat=# CREATE TABLE pagestat (
wikistat(#    date_id smallint  NOT NULL,
wikistat(#    project_id smallint  NOT NULL,
wikistat(#    page_id int NOT NULL,
wikistat(#    page_count int  NOT NULL )
wikistat-# WITH (appendonly=true, orientation=column, compresslevel=1, compresstype=QuickLZ);
NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'date_id' as the Greenplum Database data distribution key for this table.
HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
CREATE TABLE

Case #2

CREATE TABLE pagestat (
wikistat(#    date_id smallint  NOT NULL,
wikistat(#    project_id smallint  NOT NULL,
wikistat(#    page_id int NOT NULL,
wikistat(#    page_count int  NOT NULL );

wikistat=# INSERT INTO pagestat SELECT * FROM ext_pagestat;
INSERT 0 3453013109
Time: 15454405.549 ms

wikistat=# CREATE INDEX idx_bmp_project ON pagestat USING BITMAP (project_id); 
CREATE INDEX
Time: 2331785.677 ms
wikistat=# CREATE INDEX idx_bmp_date ON pagestat USING BITMAP (date_id);              
CREATE INDEX
Time: 2318416.419 ms

(after creating two indexes)
wikistat=# INSERT INTO pagestat SELECT * FROM ext_pagestat_7;
INSERT 0 3442375618                                                                                                                 
Time: 88543676.530 ms 

Datasize: 184G data

Installation procedure

bash greenplum-db-3.3.4.0-build-8-SingleNodeEdition-RHEL5-x86_64.bin

/usr/local/greenplum-db-3.3.4.0

!!!! Please add the following to /etc/sysctl.conf
kernel.shmmax=500000000
kernel.shmmni=4096
kernel.shmall=4000000000
kernel.sem=250 64000 100 512
net.ipv4.tcp_tw_recycle=1

(it's known it may break InfiniDB and MonetDB)

#load changes:
sysctl -p

#install user and dirs
useradd gpadmin
passwd gpadmin

chown -R gpadmin /usr/local/greenplum-db
chgrp -R gpadmin /usr/local/greenplum-db

"source /usr/local/greenplum-db/greenplum_path.sh"
into
 /home/gpadmin/.bashrc

mkdir -p /data/vol1/engines/greenplum/data/master
chown gpadmin /data/vol1/engines/greenplum/data/master
chgrp gpadmin /data/vol1/engines/greenplum/data/master

mkdir -p /data/vol1/engines/greenplum/data/data1
chown gpadmin /data/vol1/engines/greenplum/data/data1
chgrp gpadmin /data/vol1/engines/greenplum/data/data1

put
echo "localhost" > /data/vol1/engines/greenplum/single_host_file
chown gpadmin single_host_file
chgrp gpadmin single_host_file

cp /usr/local/greenplum-db/docs/cli_help/gp_init_config_example /data/vol1/engines/greenplum/gp_init_config


vi /data/vol1/engines/greenplum/gp_init_config
ARRAY_NAME="PerconaTest"
MACHINE_LIST_FILE=/data/vol1/engines/greenplum/single_host_file
SEG_PREFIX=gp
PORT_BASE=50000
declare -a DATA_DIRECTORY=(/data/vol1/engines/greenplum/data/data1 /data/vol1/engines/greenplum/data/data1 /data/vol1/engines/greenplum/data/data1 /data/vol1/engines/greenplum/data/data1 /data/vol1/engines/greenplum/data/data1 /data/vol1/engines/greenplum/data/data1 /data/vol1/engines/greenplum/data/data1 /data/vol1/engines/greenplum/data/data1)
MASTER_HOSTNAME=localhost
MASTER_DIRECTORY=/data/vol1/engines/greenplum/data/master
MASTER_PORT=5432

chown gpadmin gp_init_config
chgrp gpadmin gp_init_config

ulimit -n 65535

!!!! install key authentification

sudo su - gpadmin
ssh-keygen
cat /home/gpadmin/.ssh/id_rsa.pub > /home/gpadmin/.ssh/authorized_keys
chmod 600 /home/gpadmin/.ssh/authorized_keys

(check keys work: ssh gpadmin@localhost, should not ask you password)

gpinitsystem -c  /data/vol1/engines/greenplum/gp_init_config

!!! if it finishes OK you can see 8 postgres processes + 1 master process


createdb -h localhost -p 5432 wikistat
 
benchmark/wikistat/greenplum.txt · Last modified: 2011/07/17 21:05 (external edit)
Except where otherwise noted, content on this wiki is licensed under the following license:CC Attribution-Noncommercial-Share Alike 3.0 Unported
Contact Us 24 Hours A Day
SupportContact us 24×7
Emergency? Contact us for help now!
Sales North America(888) 316-9775 or
(208) 473-2904
Sales
Europe
+44-208-133-0309 (UK)
0-800-051-8984 (UK)
0-800-181-0665 (GER)
Training(855) 55TRAIN or
(925) 271-5054

 

Share This
]]> ]]>